IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 17320


Ignore:
Timestamp:
Apr 4, 2008, 12:44:56 PM (18 years ago)
Author:
Paul Price
Message:

Optimising psImageConvolveFFT. Made a couple of optimisations: 1. Moved code into psImageFFT.c, so I can use FFTW directly. FFTW, through its 'advanced interface' allows two FFTs to be performed at the same time and 'the resulting plans can often be faster than calling FFTW multiple times for the individual transforms' (FFTW 3.1.2 manual, p30). 2. The convolved image needs to be normalised because FFTW doesn't take out the sqrt(N) factors in the FFT. Instead of multiplying the entire convolved image, we can multiply the much smaller number of pixels in the kernel. Tested this code with tap_psImageConvolve2 and it seems to work. Running convolutionBench unoptimised on mithrandir produces times that are comparable to what I got a while back running optimised. Running it optimized on alala produces speed-ups ranging from 25 to 350%.

Location:
trunk/psLib
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/fft/psImageFFT.c

    r11716 r17320  
    66/// @author Robert DeSonia, MHPCC
    77///
    8 /// @version $Revision: 1.23 $ $Name: not supported by cvs2svn $
    9 /// @date $Date: 2007-02-09 00:22:55 $
     8/// @version $Revision: 1.24 $ $Name: not supported by cvs2svn $
     9/// @date $Date: 2008-04-04 22:44:56 $
    1010///
    1111/// Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
     
    2727#include "psConstants.h"
    2828#include "psImageStructManip.h"
     29#include "psImageConvolve.h"
    2930#include "psImageFFT.h"
    3031
     
    191192    return real;
    192193}
    193 
    194194
    195195
     
    274274    return true;
    275275}
     276
     277
     278psImage *psImageConvolveFFT(psImage *out, const psImage *in, const psImage *mask, psMaskType maskVal,
     279                            const psKernel *kernel)
     280{
     281    PS_ASSERT_IMAGE_NON_NULL(in, NULL);
     282    PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
     283    PS_ASSERT_KERNEL_NON_NULL(kernel, NULL);
     284    if (mask) {
     285        PS_ASSERT_IMAGE_NON_NULL(mask, NULL);
     286        PS_ASSERT_IMAGE_TYPE(mask, PS_TYPE_MASK, NULL);
     287        PS_ASSERT_IMAGES_SIZE_EQUAL(mask, in, NULL);
     288    }
     289
     290    int numCols = in->numCols, numRows = in->numRows; // Size of image
     291    int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes
     292
     293    // Need to pad the input image to protect from wrap-around effects
     294    if (xMax - xMin > numCols || yMax - yMin > numRows) {
     295        // Cannot pad the image if the kernel is larger.
     296        psError(PS_ERR_BAD_PARAMETER_SIZE, true,
     297                _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
     298                xMax, yMax, numCols, numRows);
     299        return NULL;
     300    }
     301    int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
     302    int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
     303    int numPadded = paddedCols * paddedRows; // Number of pixels in padded image
     304
     305    // Create data array containing the padded image and padded kernel
     306    psF32 *data = fftwf_malloc(2 * numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW
     307    psF32 *dataPtr = data;              // Pointer into FFTW data
     308    psF32 **imageData = in->data.F32;   // Pointer into image data
     309
     310    // Image part of data array
     311    size_t goodBytes = numCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes per image row
     312    size_t padBytes = (paddedCols - numCols) * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes to pad
     313    for (int y = 0; y < numRows; y++, dataPtr += paddedCols, imageData++) {
     314        memcpy(dataPtr, *imageData, goodBytes);
     315        memset(dataPtr + numCols, 0, padBytes);
     316    }
     317    memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     318
     319#if 0
     320    {
     321        // Use this for inspecting the result of copying the image
     322        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
     323        psFree(test->p_rawDataBuffer);
     324        test->p_rawDataBuffer = data;
     325        test->data.V[0] = test->p_rawDataBuffer;
     326        for (int y = 1; y < paddedRows; y++) {
     327            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
     328                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     329        }
     330        // View image here
     331        test->p_rawDataBuffer = NULL;
     332        psFree(test);
     333    }
     334#endif
     335
     336    // Kernel part of data array
     337    dataPtr = data + numPadded;         // Reset to kernel image location
     338    float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT
     339    // We could generate the padded kernel image using memcpy, but by going pixel by pixel we can apply the
     340    // normalisation that corrects for the FFT renormalisation.  By applying it to the kernel here, we save
     341    // applying it to the entire output image.
     342    int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative
     343    int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive
     344    int yNegMin = PS_MIN(-1, yMin), yNegMax = PS_MIN(-1, yMax); // Min and max for x when negative
     345    int yPosMin = PS_MAX(0, yMin), yPosMax = PS_MAX(0, yMax); // Min and max for x when positive
     346    int blankCols = xNegMin + paddedCols - xPosMax - 1; // Number of columns between kernel extrema
     347    int blankRows = (yNegMin + paddedRows - yPosMax - 1) * paddedCols; // Rows between kernel extrema
     348    size_t blankColBytes = blankCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes in blankCols
     349    for (int y = yPosMin; y <= yPosMax; y++) {
     350        // y is positive
     351        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
     352            // x is positive
     353            *dataPtr = kernel->kernel[y][x] * norm;
     354        }
     355        // Columns between kernel extrema
     356        memset(dataPtr, 0, blankColBytes);
     357        dataPtr += blankCols;
     358        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
     359            // x is negative
     360            *dataPtr = kernel->kernel[y][x] * norm;
     361        }
     362    }
     363    // Rows between kernel extrema
     364    memset(dataPtr, 0, blankRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     365    dataPtr += blankRows;
     366    for (int y = yNegMin; y <= yNegMax; y++) {
     367        // y is negative
     368        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
     369            // x is positive
     370            *dataPtr = kernel->kernel[y][x] * norm;
     371        }
     372        // Columns between kernel extrema
     373        memset(dataPtr, 0, blankColBytes);
     374        dataPtr += blankCols;
     375        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
     376            // x is negative
     377            *dataPtr = kernel->kernel[y][x] * norm;
     378        }
     379    }
     380
     381#if 0
     382    {
     383        // Use this for inspecting the result of copying the kernel
     384        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
     385        psFree(test->p_rawDataBuffer);
     386        test->p_rawDataBuffer = &data[numPadded];
     387        test->data.V[0] = test->p_rawDataBuffer;
     388        for (int y = 1; y < paddedRows; y++) {
     389            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
     390                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     391        }
     392        // View image here
     393        test->p_rawDataBuffer = NULL;
     394        psFree(test);
     395    }
     396#endif
     397
     398    // Mask bad pixels (which may be NANs), lest they infect everything
     399    if (mask && maskVal) {
     400        for (int y = 0; y < numRows; y++) {
     401            for (int x = 0; x < numCols; x++) {
     402                if (mask->data.PS_TYPE_MASK_DATA[y][x] & maskVal) {
     403                    data[x + paddedCols * y] = 0;
     404                }
     405            }
     406        }
     407    }
     408
     409    // Do the forward FFT
     410    // Note that the FFT images have different size from the input
     411    fftwf_complex *fft = fftwf_malloc(2 * (paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT
     412    int size[] = { paddedCols, paddedRows }; // Size of transforms
     413    int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images
     414    int fftPixels = fftCols * fftRows;  // Number of pixels in FFT image
     415    fftwf_plan forward = fftwf_plan_many_dft_r2c(2, size, 2, data, NULL, 1, paddedCols * paddedRows,
     416                                                 fft, NULL, 1, fftPixels, FFTW_PLAN_RIGOR);
     417    fftwf_execute(forward);
     418    fftwf_destroy_plan(forward);
     419
     420    // Multiply the two transforms
     421    for (int i = 0, j = fftPixels; i < fftPixels; i++, j++) {
     422        // (a + bi) * (c + di) = (ac - bd) + (bc + ad)i
     423#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
     424        // C99 complex support
     425        fft[i] *= fft[j];
     426#else
     427        // FFTW's backup complex support
     428        float imageReal = fft[i][0], imageImag = fft[i][1];
     429        float kernelReal = fft[j][0], kernelImag = fft[j][1];
     430        fft[i][0] = imageReal * kernelReal - imageImag * kernelImag;
     431        fft[i][1] = imageImag * kernelReal + imageReal * kernelImag;
     432#endif
     433    }
     434
     435    // Do the backward FFT
     436    fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR);
     437    fftwf_execute(backward);
     438    fftwf_destroy_plan(backward);
     439    fftwf_free(fft);
     440
     441    // Copy into the target, without the padding
     442    out = psImageRecycle(out, numCols, numRows, PS_TYPE_F32);
     443    psF32 **outData = out->data.F32;    // Pointer into output
     444    dataPtr = data;                     // Reset to start
     445    for (int y = 0; y < numRows; y++, outData++, dataPtr += paddedCols) {
     446        memcpy(*outData, dataPtr, goodBytes);
     447    }
     448    //    fftwf_free(data);
     449
     450    return out;
     451}
  • trunk/psLib/src/fft/psImageFFT.h

    r11704 r17320  
    55/// @author Robert DeSonia, MHPCC
    66///
    7 /// @version $Revision: 1.9 $ $Name: not supported by cvs2svn $
    8 /// @date $Date: 2007-02-08 04:23:57 $
     7/// @version $Revision: 1.10 $ $Name: not supported by cvs2svn $
     8/// @date $Date: 2008-04-04 22:44:56 $
    99/// Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
    1010///
     
    1414
    1515#include "psImage.h"
     16#include "psImageConvolve.h"
    1617
    1718/// @addtogroup MathOps Mathematical Operations
     
    6162    );
    6263
     64/// Convolve an image with a kernel, using the FFT
     65///
     66/// This is appropriate for larger kernels, where the direct convolution is slow.  The input image and kernel
     67/// are suitably padded to avoid wrap-around effects.
     68psImage *psImageConvolveFFT(
     69    psImage *out,                       ///< Output image, or NULL
     70    const psImage *in,                  ///< Image to convolve
     71    const psImage *mask,                ///< Corresponding mask
     72    psMaskType maskVal,                 ///< Value to mask
     73    const psKernel *kernel              ///< kernel to colvolve with
     74);
     75
    6376/// @}
    6477#endif // #ifndef PS_IMAGE_FFT_H
  • trunk/psLib/src/imageops/psImageConvolve.c

    r17302 r17320  
    77/// @author Eugene Magnier, IfA
    88///
    9 /// @version $Revision: 1.62 $ $Name: not supported by cvs2svn $
    10 /// @date $Date: 2008-04-03 03:00:25 $
     9/// @version $Revision: 1.63 $ $Name: not supported by cvs2svn $
     10/// @date $Date: 2008-04-04 22:44:56 $
    1111///
    1212/// Copyright 2004-2007 Institute for Astronomy, University of Hawaii
     
    386386
    387387
    388 psImage *psImageConvolveFFT(const psImage *in,
    389                             const psImage *mask,
    390                             psMaskType maskVal,
    391                             const psKernel *kernel,
    392                             float pad)
    393 {
    394     PS_ASSERT_IMAGE_NON_NULL(in, NULL);
    395     PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
    396     PS_ASSERT_PTR_NON_NULL(kernel, NULL);
    397     PS_ASSERT_IMAGE_NON_NULL(kernel->image, NULL);
    398 
    399     // Pull out kernel parameters, for convenience
    400     int xMin = kernel->xMin;
    401     int xMax = kernel->xMax;
    402     int yMin = kernel->yMin;
    403     int yMax = kernel->yMax;
    404 
    405     int numRows = in->numRows;          // Number of rows in input image
    406     int numCols = in->numCols;          // Number of columns in input image
    407 
    408     // Need to pad the input image to protect from wrap-around effects
    409     if (xMax - xMin > numCols || yMax - yMin > numRows) {
    410         // Cannot pad the image if the kernel is larger.
    411         psError(PS_ERR_BAD_PARAMETER_SIZE, true,
    412                 _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
    413                 xMax, yMax, numCols, numRows);
    414         return NULL;
    415     }
    416     int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
    417     int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
    418 
    419     // Generate padded image
    420     psImage *paddedImage = psImageAlloc(paddedCols,paddedRows,in->type.type); // Padded input image
    421     if (mask && maskVal) {
    422         // Need to replace non-finite (assumed masked) pixels, since they propagate everywhere during FFT
    423         for (int y = 0; y < numRows; y++) {
    424             for (int x = 0; x < numCols; x++) {
    425                 paddedImage->data.F32[y][x] = (mask->data.PS_TYPE_MASK_DATA[y][x] & maskVal) ? pad :
    426                     in->data.F32[y][x];
    427             }
    428         }
    429     } else {
    430         psImageOverlaySection(paddedImage, in, 0, 0, "=");
    431     }
    432     for (int y = 0; y < numRows; y++) {
    433         for (int x = numCols; x < paddedCols; x++) {
    434             paddedImage->data.F32[y][x] = pad;
    435         }
    436     }
    437     for (int y = numRows; y < paddedRows; y++) {
    438         for (int x = 0; x < paddedCols; x++) {
    439             paddedImage->data.F32[y][x] = pad;
    440         }
    441     }
    442 
    443     // Result of FFT
    444     psImage *inRealFFT = NULL, *inImagFFT = NULL;
    445     if (!psImageForwardFFT(&inRealFFT, &inImagFFT, paddedImage)) {
    446         psError(PS_ERR_UNKNOWN, false, _("Failed to fourier transform input image."));
    447         psFree(paddedImage);
    448         return NULL;
    449     }
    450     psFree(paddedImage);
    451 
    452     // Generate padded kernel image
    453     psImage *paddedKernel = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
    454     psImageInit(paddedKernel, 0.0);
    455     for (int y = PS_MIN(-1, yMin); y <= PS_MIN(-1, yMax); y++) {
    456         // y is negative
    457         if (xMin < 0) {
    458             // x is negative
    459             memcpy(&paddedKernel->data.F32[paddedRows + y][paddedCols + xMin], &kernel->kernel[y][xMin],
    460                    (PS_MIN(0, xMax) - xMin) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    461         }
    462         if (xMax >= 0) {
    463             // x is positive
    464             int min = PS_MAX(0, xMin);  // Minimum value of x when positive
    465             memcpy(&paddedKernel->data.F32[paddedRows + y][min], &kernel->kernel[y][min],
    466                    (xMax - min + 1) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    467         }
    468     }
    469     for (int y = PS_MAX(0, yMin); y <= PS_MAX(0, yMax); y++) {
    470         // y is positive
    471         if (xMin < 0) {
    472             // x is negative
    473             memcpy(&paddedKernel->data.F32[y][paddedCols + xMin], &kernel->kernel[y][xMin],
    474                    (PS_MIN(0, xMax) - xMin) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    475         }
    476         if (xMax >= 0) {
    477             // x is positive
    478             int min = PS_MAX(0, xMin);  // Minimum value of x when positive
    479             memcpy(&paddedKernel->data.F32[y][min], &kernel->kernel[y][min],
    480                    (xMax - min + 1) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    481         }
    482     }
    483 
    484     psImage *kernelRealFFT = NULL, *kernelImagFFT = NULL;
    485     if (!psImageForwardFFT(&kernelRealFFT, &kernelImagFFT, paddedKernel)) {
    486         psError(PS_ERR_UNKNOWN, false, _("Failed to fourier transform kernel."));
    487         psFree(inRealFFT);
    488         psFree(inImagFFT);
    489         psFree(paddedKernel);
    490         return NULL;
    491     }
    492     psFree(paddedKernel);
    493 
    494     // Convolution in fourier domain is just a pixel-wise multiplication
    495     if (!psImageComplexMultiply(&inRealFFT, &inImagFFT, inRealFFT, inImagFFT, kernelRealFFT, kernelImagFFT)) {
    496         psError(PS_ERR_UNKNOWN, false, _("Unable to multiply fourier transformts."));
    497         psFree(inRealFFT);
    498         psFree(inImagFFT);
    499         psFree(kernelRealFFT);
    500         psFree(kernelImagFFT);
    501         return NULL;
    502     }
    503     psFree(kernelRealFFT);
    504     psFree(kernelImagFFT);
    505 
    506     psImage *paddedConvolved = NULL; // Padded convolved image
    507     if (!psImageBackwardFFT(&paddedConvolved, inRealFFT, inImagFFT, paddedCols)) {
    508         psError(PS_ERR_UNKNOWN, false, _("Failed to invert fourier transform of convolution image."));
    509         psFree(inRealFFT);
    510         psFree(inImagFFT);
    511         return NULL;
    512     }
    513     psFree(inRealFFT);
    514     psFree(inImagFFT);
    515 
    516     // Trim off the padding, then renormalise (which also does a copy, so there's no parent for the output)
    517     psImage *convolved = psImageSubset(paddedConvolved, psRegionSet(0, numCols, 0, numRows));
    518     psImage *out = (psImage*)psBinaryOp(NULL, convolved, "*",
    519                                         psScalarAlloc(1.0 / paddedCols / paddedRows, PS_TYPE_F32));
    520     psFree(convolved);
    521     psFree(paddedConvolved);
    522 
    523     return out;
    524 }
    525 
    526 
    527388psImage *psImageConvolveMaskFFT(psImage *out, const psImage *mask, psMaskType maskVal,
    528389                                psMaskType setVal, int xMin, int xMax, int yMin, int yMax, float thresh)
     
    573434    psKernel *kernel = psKernelAlloc(xMin, xMax, yMin, yMax);
    574435    psImageInit(kernel->image, 1.0);
    575     psImage *convolved = psImageConvolveFFT(onoff, NULL, 0, kernel, 0.0);
     436    psImage *convolved = psImageConvolveFFT(NULL, onoff, NULL, 0, kernel);
    576437    psFree(onoff);
    577438    psFree(kernel);
  • trunk/psLib/src/imageops/psImageConvolve.h

    r15321 r17320  
    55 * @author Robert DeSonia, MHPCC
    66 *
    7  * @version $Revision: 1.31 $ $Name: not supported by cvs2svn $
    8  * @date $Date: 2007-10-17 01:49:12 $
     7 * @version $Revision: 1.32 $ $Name: not supported by cvs2svn $
     8 * @date $Date: 2008-04-04 22:44:56 $
    99 * Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
    1010 */
     
    141141);
    142142
    143 /// Convolve an image with a kernel, using the FFT
    144 ///
    145 /// This is appropriate for larger kernels, where the direct convolution is slow.  The input image and kernel
    146 /// are suitably padded to avoid wrap-around effects.
    147 psImage *psImageConvolveFFT(
    148     const psImage *in,                  ///< Image to convolve
    149     const psImage *mask,                ///< Corresponding mask
    150     psMaskType maskVal,                 ///< Value to mask
    151     const psKernel *kernel,             ///< kernel to colvolve with
    152     float pad                           ///< Value to use to pad the input image
    153 );
    154 
    155143/// Convolve a mask image with a kernel, using direct convolution
    156144///
  • trunk/psLib/test/imageops/convolutionBench.c

    r14866 r17320  
    4747            psKernel *kernel = generateKernel(kernelCols, kernelRows);
    4848            psTimerStart("fft");
    49             psImage *convolved = psImageConvolveFFT(image, NULL, 0, kernel, 0.0);
     49            psImage *convolved = psImageConvolveFFT(NULL, image, NULL, 0, kernel);
    5050            fft += psTimerMark("fft");
    5151            psFree(convolved);
  • trunk/psLib/test/imageops/tap_psImageConvolve2.c

    r17301 r17320  
    9595        psKernel *kernel = generateKernel();
    9696
    97         psImage *convolved = psImageConvolveFFT(image, NULL, 0, kernel, 0.0);
     97        psImage *convolved = psImageConvolveFFT(NULL, image, NULL, 0, kernel);
    9898        ok(convolved, "convolution result");
    9999        skip_start(!convolved, 3, "convolution failed");
Note: See TracChangeset for help on using the changeset viewer.