Index: trunk/psLib/src/fft/psImageFFT.c
===================================================================
--- trunk/psLib/src/fft/psImageFFT.c	(revision 11716)
+++ trunk/psLib/src/fft/psImageFFT.c	(revision 17320)
@@ -6,6 +6,6 @@
 /// @author Robert DeSonia, MHPCC
 ///
-/// @version $Revision: 1.23 $ $Name: not supported by cvs2svn $
-/// @date $Date: 2007-02-09 00:22:55 $
+/// @version $Revision: 1.24 $ $Name: not supported by cvs2svn $
+/// @date $Date: 2008-04-04 22:44:56 $
 ///
 /// Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
@@ -27,4 +27,5 @@
 #include "psConstants.h"
 #include "psImageStructManip.h"
+#include "psImageConvolve.h"
 #include "psImageFFT.h"
 
@@ -191,5 +192,4 @@
     return real;
 }
-
 
 
@@ -274,2 +274,178 @@
     return true;
 }
+
+
+psImage *psImageConvolveFFT(psImage *out, const psImage *in, const psImage *mask, psMaskType maskVal,
+                            const psKernel *kernel)
+{
+    PS_ASSERT_IMAGE_NON_NULL(in, NULL);
+    PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
+    PS_ASSERT_KERNEL_NON_NULL(kernel, NULL);
+    if (mask) {
+        PS_ASSERT_IMAGE_NON_NULL(mask, NULL);
+        PS_ASSERT_IMAGE_TYPE(mask, PS_TYPE_MASK, NULL);
+        PS_ASSERT_IMAGES_SIZE_EQUAL(mask, in, NULL);
+    }
+
+    int numCols = in->numCols, numRows = in->numRows; // Size of image
+    int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes
+
+    // Need to pad the input image to protect from wrap-around effects
+    if (xMax - xMin > numCols || yMax - yMin > numRows) {
+        // Cannot pad the image if the kernel is larger.
+        psError(PS_ERR_BAD_PARAMETER_SIZE, true,
+                _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
+                xMax, yMax, numCols, numRows);
+        return NULL;
+    }
+    int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
+    int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
+    int numPadded = paddedCols * paddedRows; // Number of pixels in padded image
+
+    // Create data array containing the padded image and padded kernel
+    psF32 *data = fftwf_malloc(2 * numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW
+    psF32 *dataPtr = data;              // Pointer into FFTW data
+    psF32 **imageData = in->data.F32;   // Pointer into image data
+
+    // Image part of data array
+    size_t goodBytes = numCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes per image row
+    size_t padBytes = (paddedCols - numCols) * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes to pad
+    for (int y = 0; y < numRows; y++, dataPtr += paddedCols, imageData++) {
+        memcpy(dataPtr, *imageData, goodBytes);
+        memset(dataPtr + numCols, 0, padBytes);
+    }
+    memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+
+#if 0
+    {
+        // Use this for inspecting the result of copying the image
+        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
+        psFree(test->p_rawDataBuffer);
+        test->p_rawDataBuffer = data;
+        test->data.V[0] = test->p_rawDataBuffer;
+        for (int y = 1; y < paddedRows; y++) {
+            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
+                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
+        // View image here
+        test->p_rawDataBuffer = NULL;
+        psFree(test);
+    }
+#endif
+
+    // Kernel part of data array
+    dataPtr = data + numPadded;         // Reset to kernel image location
+    float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT
+    // We could generate the padded kernel image using memcpy, but by going pixel by pixel we can apply the
+    // normalisation that corrects for the FFT renormalisation.  By applying it to the kernel here, we save
+    // applying it to the entire output image.
+    int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative
+    int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive
+    int yNegMin = PS_MIN(-1, yMin), yNegMax = PS_MIN(-1, yMax); // Min and max for x when negative
+    int yPosMin = PS_MAX(0, yMin), yPosMax = PS_MAX(0, yMax); // Min and max for x when positive
+    int blankCols = xNegMin + paddedCols - xPosMax - 1; // Number of columns between kernel extrema
+    int blankRows = (yNegMin + paddedRows - yPosMax - 1) * paddedCols; // Rows between kernel extrema
+    size_t blankColBytes = blankCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes in blankCols
+    for (int y = yPosMin; y <= yPosMax; y++) {
+        // y is positive
+        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
+            // x is positive
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+        // Columns between kernel extrema
+        memset(dataPtr, 0, blankColBytes);
+        dataPtr += blankCols;
+        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
+            // x is negative
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+    }
+    // Rows between kernel extrema
+    memset(dataPtr, 0, blankRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+    dataPtr += blankRows;
+    for (int y = yNegMin; y <= yNegMax; y++) {
+        // y is negative
+        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
+            // x is positive
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+        // Columns between kernel extrema
+        memset(dataPtr, 0, blankColBytes);
+        dataPtr += blankCols;
+        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
+            // x is negative
+            *dataPtr = kernel->kernel[y][x] * norm;
+        }
+    }
+
+#if 0
+    {
+        // Use this for inspecting the result of copying the kernel
+        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
+        psFree(test->p_rawDataBuffer);
+        test->p_rawDataBuffer = &data[numPadded];
+        test->data.V[0] = test->p_rawDataBuffer;
+        for (int y = 1; y < paddedRows; y++) {
+            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
+                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
+        }
+        // View image here
+        test->p_rawDataBuffer = NULL;
+        psFree(test);
+    }
+#endif
+
+    // Mask bad pixels (which may be NANs), lest they infect everything
+    if (mask && maskVal) {
+        for (int y = 0; y < numRows; y++) {
+            for (int x = 0; x < numCols; x++) {
+                if (mask->data.PS_TYPE_MASK_DATA[y][x] & maskVal) {
+                    data[x + paddedCols * y] = 0;
+                }
+            }
+        }
+    }
+
+    // Do the forward FFT
+    // Note that the FFT images have different size from the input
+    fftwf_complex *fft = fftwf_malloc(2 * (paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT
+    int size[] = { paddedCols, paddedRows }; // Size of transforms
+    int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images
+    int fftPixels = fftCols * fftRows;  // Number of pixels in FFT image
+    fftwf_plan forward = fftwf_plan_many_dft_r2c(2, size, 2, data, NULL, 1, paddedCols * paddedRows,
+                                                 fft, NULL, 1, fftPixels, FFTW_PLAN_RIGOR);
+    fftwf_execute(forward);
+    fftwf_destroy_plan(forward);
+
+    // Multiply the two transforms
+    for (int i = 0, j = fftPixels; i < fftPixels; i++, j++) {
+        // (a + bi) * (c + di) = (ac - bd) + (bc + ad)i
+#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
+        // C99 complex support
+        fft[i] *= fft[j];
+#else
+        // FFTW's backup complex support
+        float imageReal = fft[i][0], imageImag = fft[i][1];
+        float kernelReal = fft[j][0], kernelImag = fft[j][1];
+        fft[i][0] = imageReal * kernelReal - imageImag * kernelImag;
+        fft[i][1] = imageImag * kernelReal + imageReal * kernelImag;
+#endif
+    }
+
+    // Do the backward FFT
+    fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR);
+    fftwf_execute(backward);
+    fftwf_destroy_plan(backward);
+    fftwf_free(fft);
+
+    // Copy into the target, without the padding
+    out = psImageRecycle(out, numCols, numRows, PS_TYPE_F32);
+    psF32 **outData = out->data.F32;    // Pointer into output
+    dataPtr = data;                     // Reset to start
+    for (int y = 0; y < numRows; y++, outData++, dataPtr += paddedCols) {
+        memcpy(*outData, dataPtr, goodBytes);
+    }
+    //    fftwf_free(data);
+
+    return out;
+}
