Context Navigation

← Previous Changeset
Next Changeset →

Changeset 17320

Timestamp:

Apr 4, 2008, 12:44:56 PM (18 years ago)

Author:

Paul Price

Message:

Optimising psImageConvolveFFT. Made a couple of optimisations: 1. Moved code into psImageFFT.c, so I can use FFTW directly. FFTW, through its 'advanced interface' allows two FFTs to be performed at the same time and 'the resulting plans can often be faster than calling FFTW multiple times for the individual transforms' (FFTW 3.1.2 manual, p30). 2. The convolved image needs to be normalised because FFTW doesn't take out the sqrt(N) factors in the FFT. Instead of multiplying the entire convolved image, we can multiply the much smaller number of pixels in the kernel. Tested this code with tap_psImageConvolve2 and it seems to work. Running convolutionBench unoptimised on mithrandir produces times that are comparable to what I got a while back running optimised. Running it optimized on alala produces speed-ups ranging from 25 to 350%.

Location:

trunk/psLib

Files:

: 6 edited

src/fft/psImageFFT.c (modified) (4 diffs)
src/fft/psImageFFT.h (modified) (3 diffs)
src/imageops/psImageConvolve.c (modified) (3 diffs)
src/imageops/psImageConvolve.h (modified) (2 diffs)
test/imageops/convolutionBench.c (modified) (1 diff)
test/imageops/tap_psImageConvolve2.c (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

trunk/psLib/src/fft/psImageFFT.c

-              r11716
+              r17320
 /// @author Robert DeSonia, MHPCC
 ///
 /// @version $Revision: 1.23 $ $Name: not supported by cvs2svn $
 /// @date $Date: 2007-02-09 00:22:55 $
+/// @version $Revision: 1.24 $ $Name: not supported by cvs2svn $
+/// @date $Date: 2008-04-04 22:44:56 $
 ///
 /// Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
 …
 #include "psConstants.h"
 #include "psImageStructManip.h"
+#include "psImageConvolve.h"
 #include "psImageFFT.h"
 …
     return real;
+}
 …
     return true;
+}
+psImage *psImageConvolveFFT(psImage *out, const psImage *in, const psImage *mask, psMaskType maskVal,
+                            const psKernel *kernel)
+{
+    PS_ASSERT_IMAGE_NON_NULL(in, NULL);
+    PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
+    PS_ASSERT_KERNEL_NON_NULL(kernel, NULL);
+    if (mask) {
+        PS_ASSERT_IMAGE_NON_NULL(mask, NULL);
+        PS_ASSERT_IMAGE_TYPE(mask, PS_TYPE_MASK, NULL);
+        PS_ASSERT_IMAGES_SIZE_EQUAL(mask, in, NULL);
+    }
+    int numCols = in->numCols, numRows = in->numRows; // Size of image
+    int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes
+    // Need to pad the input image to protect from wrap-around effects
+    if (xMax - xMin > numCols || yMax - yMin > numRows) {
+        // Cannot pad the image if the kernel is larger.
+        psError(PS_ERR_BAD_PARAMETER_SIZE, true,
+                _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
+                xMax, yMax, numCols, numRows);
+        return NULL;
+    }
+    int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
+    int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
+    int numPadded = paddedCols * paddedRows; // Number of pixels in padded image
+    // Create data array containing the padded image and padded kernel
+    psF32 *data = fftwf_malloc(2 * numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW
+    psF32 *dataPtr = data;              // Pointer into FFTW data
+    psF32 **imageData = in->data.F32;   // Pointer into image data
+    // Image part of data array
+    size_t goodBytes = numCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes per image row
+    size_t padBytes = (paddedCols - numCols) * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes to pad
+    for (int y = 0; y < numRows; y++, dataPtr += paddedCols, imageData++) {
+        memcpy(dataPtr, *imageData, goodBytes);
+        memset(dataPtr + numCols, 0, padBytes);
+    }
+    memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+#if 0
+    {
+        // Use this for inspecting the result of copying the image
+        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
+        psFree(test->p_rawDataBuffer);
+        test->p_rawDataBuffer = data;
+        test->data.V[0] = test->p_rawDataBuffer;
+        for (int y = 1; y < paddedRows; y++) {
+            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
+                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
+        // View image here
+        test->p_rawDataBuffer = NULL;
+        psFree(test);
+    }
+#endif
+    // Kernel part of data array
+    dataPtr = data + numPadded;         // Reset to kernel image location
+    float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT
+    // We could generate the padded kernel image using memcpy, but by going pixel by pixel we can apply the
+    // normalisation that corrects for the FFT renormalisation.  By applying it to the kernel here, we save
+    // applying it to the entire output image.
+    int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative
+    int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive
+    int yNegMin = PS_MIN(-1, yMin), yNegMax = PS_MIN(-1, yMax); // Min and max for x when negative
+    int yPosMin = PS_MAX(0, yMin), yPosMax = PS_MAX(0, yMax); // Min and max for x when positive
+    int blankCols = xNegMin + paddedCols - xPosMax - 1; // Number of columns between kernel extrema
+    int blankRows = (yNegMin + paddedRows - yPosMax - 1) * paddedCols; // Rows between kernel extrema
+    size_t blankColBytes = blankCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes in blankCols
+    for (int y = yPosMin; y <= yPosMax; y++) {
+        // y is positive
+        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
+            // x is positive
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+        // Columns between kernel extrema
+        memset(dataPtr, 0, blankColBytes);
+        dataPtr += blankCols;
+        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
+            // x is negative
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+    }
+    // Rows between kernel extrema
+    memset(dataPtr, 0, blankRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+    dataPtr += blankRows;
+    for (int y = yNegMin; y <= yNegMax; y++) {
+        // y is negative
+        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
+            // x is positive
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+        // Columns between kernel extrema
+        memset(dataPtr, 0, blankColBytes);
+        dataPtr += blankCols;
+        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
+            // x is negative
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+    }
+#if 0
+    {
+        // Use this for inspecting the result of copying the kernel
+        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
+        psFree(test->p_rawDataBuffer);
+        test->p_rawDataBuffer = &data[numPadded];
+        test->data.V[0] = test->p_rawDataBuffer;
+        for (int y = 1; y < paddedRows; y++) {
+            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
+                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
+        // View image here
+        test->p_rawDataBuffer = NULL;
+        psFree(test);
+    }
+#endif
+    // Mask bad pixels (which may be NANs), lest they infect everything
+    if (mask && maskVal) {
+        for (int y = 0; y < numRows; y++) {
+            for (int x = 0; x < numCols; x++) {
+                if (mask->data.PS_TYPE_MASK_DATA[y][x] & maskVal) {
+                    data[x + paddedCols * y] = 0;
+                }
+            }
+        }
+    }
+    // Do the forward FFT
+    // Note that the FFT images have different size from the input
+    fftwf_complex *fft = fftwf_malloc(2 * (paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT
+    int size[] = { paddedCols, paddedRows }; // Size of transforms
+    int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images
+    int fftPixels = fftCols * fftRows;  // Number of pixels in FFT image
+    fftwf_plan forward = fftwf_plan_many_dft_r2c(2, size, 2, data, NULL, 1, paddedCols * paddedRows,
+                                                 fft, NULL, 1, fftPixels, FFTW_PLAN_RIGOR);
+    fftwf_execute(forward);
+    fftwf_destroy_plan(forward);
+    // Multiply the two transforms
+    for (int i = 0, j = fftPixels; i < fftPixels; i++, j++) {
+        // (a + bi) * (c + di) = (ac - bd) + (bc + ad)i
+#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
+        // C99 complex support
+        fft[i] *= fft[j];
+#else
+        // FFTW's backup complex support
+        float imageReal = fft[i][0], imageImag = fft[i][1];
+        float kernelReal = fft[j][0], kernelImag = fft[j][1];
+        fft[i][0] = imageReal * kernelReal - imageImag * kernelImag;
+        fft[i][1] = imageImag * kernelReal + imageReal * kernelImag;
+#endif
+    }
+    // Do the backward FFT
+    fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR);
+    fftwf_execute(backward);
+    fftwf_destroy_plan(backward);
+    fftwf_free(fft);
+    // Copy into the target, without the padding
+    out = psImageRecycle(out, numCols, numRows, PS_TYPE_F32);
+    psF32 **outData = out->data.F32;    // Pointer into output
+    dataPtr = data;                     // Reset to start
+    for (int y = 0; y < numRows; y++, outData++, dataPtr += paddedCols) {
+        memcpy(*outData, dataPtr, goodBytes);
+    }
+    //    fftwf_free(data);
+    return out;
+}

trunk/psLib/src/fft/psImageFFT.h

-              r11704
+              r17320
 /// @author Robert DeSonia, MHPCC
 ///
 /// @version $Revision: 1.9 $ $Name: not supported by cvs2svn $
 /// @date $Date: 2007-02-08 04:23:57 $
+/// @version $Revision: 1.10 $ $Name: not supported by cvs2svn $
+/// @date $Date: 2008-04-04 22:44:56 $
 /// Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
 ///
 …
 #include "psImage.h"
+#include "psImageConvolve.h"
 /// @addtogroup MathOps Mathematical Operations
 …
     );
+/// Convolve an image with a kernel, using the FFT
+///
+/// This is appropriate for larger kernels, where the direct convolution is slow.  The input image and kernel
+/// are suitably padded to avoid wrap-around effects.
+psImage *psImageConvolveFFT(
+    psImage *out,                       ///< Output image, or NULL
+    const psImage *in,                  ///< Image to convolve
+    const psImage *mask,                ///< Corresponding mask
+    psMaskType maskVal,                 ///< Value to mask
+    const psKernel *kernel              ///< kernel to colvolve with
+);
 /// @}
 #endif // #ifndef PS_IMAGE_FFT_H

trunk/psLib/src/imageops/psImageConvolve.c

-              r17302
+              r17320
 /// @author Eugene Magnier, IfA
 ///
 /// @version $Revision: 1.62 $ $Name: not supported by cvs2svn $
 /// @date $Date: 2008-04-03 03:00:25 $
+/// @version $Revision: 1.63 $ $Name: not supported by cvs2svn $
+/// @date $Date: 2008-04-04 22:44:56 $
 ///
 /// Copyright 2004-2007 Institute for Astronomy, University of Hawaii
 …
-psImage *psImageConvolveFFT(const psImage *in,
-                            const psImage *mask,
-                            psMaskType maskVal,
-                            const psKernel *kernel,
-                            float pad)
+{
-    PS_ASSERT_IMAGE_NON_NULL(in, NULL);
-    PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
-    PS_ASSERT_PTR_NON_NULL(kernel, NULL);
-    PS_ASSERT_IMAGE_NON_NULL(kernel->image, NULL);
-    // Pull out kernel parameters, for convenience
-    int xMin = kernel->xMin;
-    int xMax = kernel->xMax;
-    int yMin = kernel->yMin;
-    int yMax = kernel->yMax;
-    int numRows = in->numRows;          // Number of rows in input image
-    int numCols = in->numCols;          // Number of columns in input image
-    // Need to pad the input image to protect from wrap-around effects
-    if (xMax - xMin > numCols || yMax - yMin > numRows) {
-        // Cannot pad the image if the kernel is larger.
-        psError(PS_ERR_BAD_PARAMETER_SIZE, true,
-                _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
-                xMax, yMax, numCols, numRows);
-        return NULL;
+    }
-    int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
-    int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
-    // Generate padded image
-    psImage *paddedImage = psImageAlloc(paddedCols,paddedRows,in->type.type); // Padded input image
-    if (mask && maskVal) {
-        // Need to replace non-finite (assumed masked) pixels, since they propagate everywhere during FFT
-        for (int y = 0; y < numRows; y++) {
-            for (int x = 0; x < numCols; x++) {
-                paddedImage->data.F32[y][x] = (mask->data.PS_TYPE_MASK_DATA[y][x] & maskVal) ? pad :
-                    in->data.F32[y][x];
+            }
+        }
-    } else {
-        psImageOverlaySection(paddedImage, in, 0, 0, "=");
+    }
-    for (int y = 0; y < numRows; y++) {
-        for (int x = numCols; x < paddedCols; x++) {
-            paddedImage->data.F32[y][x] = pad;
+        }
+    }
-    for (int y = numRows; y < paddedRows; y++) {
-        for (int x = 0; x < paddedCols; x++) {
-            paddedImage->data.F32[y][x] = pad;
+        }
+    }
-    // Result of FFT
-    psImage *inRealFFT = NULL, *inImagFFT = NULL;
-    if (!psImageForwardFFT(&inRealFFT, &inImagFFT, paddedImage)) {
-        psError(PS_ERR_UNKNOWN, false, _("Failed to fourier transform input image."));
-        psFree(paddedImage);
-        return NULL;
+    }
-    psFree(paddedImage);
-    // Generate padded kernel image
-    psImage *paddedKernel = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
-    psImageInit(paddedKernel, 0.0);
-    for (int y = PS_MIN(-1, yMin); y <= PS_MIN(-1, yMax); y++) {
-        // y is negative
-        if (xMin < 0) {
-            // x is negative
-            memcpy(&paddedKernel->data.F32[paddedRows + y][paddedCols + xMin], &kernel->kernel[y][xMin],
-                   (PS_MIN(0, xMax) - xMin) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
-        if (xMax >= 0) {
-            // x is positive
-            int min = PS_MAX(0, xMin);  // Minimum value of x when positive
-            memcpy(&paddedKernel->data.F32[paddedRows + y][min], &kernel->kernel[y][min],
-                   (xMax - min + 1) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
+    }
-    for (int y = PS_MAX(0, yMin); y <= PS_MAX(0, yMax); y++) {
-        // y is positive
-        if (xMin < 0) {
-            // x is negative
-            memcpy(&paddedKernel->data.F32[y][paddedCols + xMin], &kernel->kernel[y][xMin],
-                   (PS_MIN(0, xMax) - xMin) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
-        if (xMax >= 0) {
-            // x is positive
-            int min = PS_MAX(0, xMin);  // Minimum value of x when positive
-            memcpy(&paddedKernel->data.F32[y][min], &kernel->kernel[y][min],
-                   (xMax - min + 1) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
+    }
-    psImage *kernelRealFFT = NULL, *kernelImagFFT = NULL;
-    if (!psImageForwardFFT(&kernelRealFFT, &kernelImagFFT, paddedKernel)) {
-        psError(PS_ERR_UNKNOWN, false, _("Failed to fourier transform kernel."));
-        psFree(inRealFFT);
-        psFree(inImagFFT);
-        psFree(paddedKernel);
-        return NULL;
+    }
-    psFree(paddedKernel);
-    // Convolution in fourier domain is just a pixel-wise multiplication
-    if (!psImageComplexMultiply(&inRealFFT, &inImagFFT, inRealFFT, inImagFFT, kernelRealFFT, kernelImagFFT)) {
-        psError(PS_ERR_UNKNOWN, false, _("Unable to multiply fourier transformts."));
-        psFree(inRealFFT);
-        psFree(inImagFFT);
-        psFree(kernelRealFFT);
-        psFree(kernelImagFFT);
-        return NULL;
+    }
-    psFree(kernelRealFFT);
-    psFree(kernelImagFFT);
-    psImage *paddedConvolved = NULL; // Padded convolved image
-    if (!psImageBackwardFFT(&paddedConvolved, inRealFFT, inImagFFT, paddedCols)) {
-        psError(PS_ERR_UNKNOWN, false, _("Failed to invert fourier transform of convolution image."));
-        psFree(inRealFFT);
-        psFree(inImagFFT);
-        return NULL;
+    }
-    psFree(inRealFFT);
-    psFree(inImagFFT);
-    // Trim off the padding, then renormalise (which also does a copy, so there's no parent for the output)
-    psImage *convolved = psImageSubset(paddedConvolved, psRegionSet(0, numCols, 0, numRows));
-    psImage *out = (psImage*)psBinaryOp(NULL, convolved, "*",
-                                        psScalarAlloc(1.0 / paddedCols / paddedRows, PS_TYPE_F32));
-    psFree(convolved);
-    psFree(paddedConvolved);
-    return out;
+}
 psImage *psImageConvolveMaskFFT(psImage *out, const psImage *mask, psMaskType maskVal,
                                 psMaskType setVal, int xMin, int xMax, int yMin, int yMax, float thresh)
 …
     psKernel *kernel = psKernelAlloc(xMin, xMax, yMin, yMax);
     psImageInit(kernel->image, 1.0);
     psImage *convolved = psImageConvolveFFT(onoff, NULL, 0, kernel, 0.0);
+    psImage *convolved = psImageConvolveFFT(NULL, onoff, NULL, 0, kernel);
     psFree(onoff);
     psFree(kernel);

trunk/psLib/src/imageops/psImageConvolve.h

-              r15321
+              r17320
  * @author Robert DeSonia, MHPCC
+ *
  * @version $Revision: 1.31 $ $Name: not supported by cvs2svn $
  * @date $Date: 2007-10-17 01:49:12 $
+ * @version $Revision: 1.32 $ $Name: not supported by cvs2svn $
+ * @date $Date: 2008-04-04 22:44:56 $
  * Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
  */
 …
 );
-/// Convolve an image with a kernel, using the FFT
-///
-/// This is appropriate for larger kernels, where the direct convolution is slow.  The input image and kernel
-/// are suitably padded to avoid wrap-around effects.
-psImage *psImageConvolveFFT(
-    const psImage *in,                  ///< Image to convolve
-    const psImage *mask,                ///< Corresponding mask
-    psMaskType maskVal,                 ///< Value to mask
-    const psKernel *kernel,             ///< kernel to colvolve with
-    float pad                           ///< Value to use to pad the input image
-);
 /// Convolve a mask image with a kernel, using direct convolution
 ///

trunk/psLib/test/imageops/convolutionBench.c

r14866	r17320
47	47	psKernel *kernel = generateKernel(kernelCols, kernelRows);
48	48	psTimerStart("fft");
49		psImage *convolved = psImageConvolveFFT(~~image, NULL, 0, kernel, 0.0~~);
	49	psImage *convolved = psImageConvolveFFT(NULL, image, NULL, 0, kernel);
50	50	fft += psTimerMark("fft");
51	51	psFree(convolved);

trunk/psLib/test/imageops/tap_psImageConvolve2.c

r17301	r17320
95	95	psKernel *kernel = generateKernel();
96	96
97		psImage *convolved = psImageConvolveFFT(~~image, NULL, 0, kernel, 0.0~~);
	97	psImage *convolved = psImageConvolveFFT(NULL, image, NULL, 0, kernel);
98	98	ok(convolved, "convolution result");
99	99	skip_start(!convolved, 3, "convolution failed");

Note: See TracChangeset for help on using the changeset viewer.