IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Apr 4, 2008, 12:44:56 PM (18 years ago)
Author:
Paul Price
Message:

Optimising psImageConvolveFFT. Made a couple of optimisations: 1. Moved code into psImageFFT.c, so I can use FFTW directly. FFTW, through its 'advanced interface' allows two FFTs to be performed at the same time and 'the resulting plans can often be faster than calling FFTW multiple times for the individual transforms' (FFTW 3.1.2 manual, p30). 2. The convolved image needs to be normalised because FFTW doesn't take out the sqrt(N) factors in the FFT. Instead of multiplying the entire convolved image, we can multiply the much smaller number of pixels in the kernel. Tested this code with tap_psImageConvolve2 and it seems to work. Running convolutionBench unoptimised on mithrandir produces times that are comparable to what I got a while back running optimised. Running it optimized on alala produces speed-ups ranging from 25 to 350%.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/imageops/psImageConvolve.c

    r17302 r17320  
    77/// @author Eugene Magnier, IfA
    88///
    9 /// @version $Revision: 1.62 $ $Name: not supported by cvs2svn $
    10 /// @date $Date: 2008-04-03 03:00:25 $
     9/// @version $Revision: 1.63 $ $Name: not supported by cvs2svn $
     10/// @date $Date: 2008-04-04 22:44:56 $
    1111///
    1212/// Copyright 2004-2007 Institute for Astronomy, University of Hawaii
     
    386386
    387387
    388 psImage *psImageConvolveFFT(const psImage *in,
    389                             const psImage *mask,
    390                             psMaskType maskVal,
    391                             const psKernel *kernel,
    392                             float pad)
    393 {
    394     PS_ASSERT_IMAGE_NON_NULL(in, NULL);
    395     PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
    396     PS_ASSERT_PTR_NON_NULL(kernel, NULL);
    397     PS_ASSERT_IMAGE_NON_NULL(kernel->image, NULL);
    398 
    399     // Pull out kernel parameters, for convenience
    400     int xMin = kernel->xMin;
    401     int xMax = kernel->xMax;
    402     int yMin = kernel->yMin;
    403     int yMax = kernel->yMax;
    404 
    405     int numRows = in->numRows;          // Number of rows in input image
    406     int numCols = in->numCols;          // Number of columns in input image
    407 
    408     // Need to pad the input image to protect from wrap-around effects
    409     if (xMax - xMin > numCols || yMax - yMin > numRows) {
    410         // Cannot pad the image if the kernel is larger.
    411         psError(PS_ERR_BAD_PARAMETER_SIZE, true,
    412                 _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
    413                 xMax, yMax, numCols, numRows);
    414         return NULL;
    415     }
    416     int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
    417     int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
    418 
    419     // Generate padded image
    420     psImage *paddedImage = psImageAlloc(paddedCols,paddedRows,in->type.type); // Padded input image
    421     if (mask && maskVal) {
    422         // Need to replace non-finite (assumed masked) pixels, since they propagate everywhere during FFT
    423         for (int y = 0; y < numRows; y++) {
    424             for (int x = 0; x < numCols; x++) {
    425                 paddedImage->data.F32[y][x] = (mask->data.PS_TYPE_MASK_DATA[y][x] & maskVal) ? pad :
    426                     in->data.F32[y][x];
    427             }
    428         }
    429     } else {
    430         psImageOverlaySection(paddedImage, in, 0, 0, "=");
    431     }
    432     for (int y = 0; y < numRows; y++) {
    433         for (int x = numCols; x < paddedCols; x++) {
    434             paddedImage->data.F32[y][x] = pad;
    435         }
    436     }
    437     for (int y = numRows; y < paddedRows; y++) {
    438         for (int x = 0; x < paddedCols; x++) {
    439             paddedImage->data.F32[y][x] = pad;
    440         }
    441     }
    442 
    443     // Result of FFT
    444     psImage *inRealFFT = NULL, *inImagFFT = NULL;
    445     if (!psImageForwardFFT(&inRealFFT, &inImagFFT, paddedImage)) {
    446         psError(PS_ERR_UNKNOWN, false, _("Failed to fourier transform input image."));
    447         psFree(paddedImage);
    448         return NULL;
    449     }
    450     psFree(paddedImage);
    451 
    452     // Generate padded kernel image
    453     psImage *paddedKernel = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
    454     psImageInit(paddedKernel, 0.0);
    455     for (int y = PS_MIN(-1, yMin); y <= PS_MIN(-1, yMax); y++) {
    456         // y is negative
    457         if (xMin < 0) {
    458             // x is negative
    459             memcpy(&paddedKernel->data.F32[paddedRows + y][paddedCols + xMin], &kernel->kernel[y][xMin],
    460                    (PS_MIN(0, xMax) - xMin) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    461         }
    462         if (xMax >= 0) {
    463             // x is positive
    464             int min = PS_MAX(0, xMin);  // Minimum value of x when positive
    465             memcpy(&paddedKernel->data.F32[paddedRows + y][min], &kernel->kernel[y][min],
    466                    (xMax - min + 1) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    467         }
    468     }
    469     for (int y = PS_MAX(0, yMin); y <= PS_MAX(0, yMax); y++) {
    470         // y is positive
    471         if (xMin < 0) {
    472             // x is negative
    473             memcpy(&paddedKernel->data.F32[y][paddedCols + xMin], &kernel->kernel[y][xMin],
    474                    (PS_MIN(0, xMax) - xMin) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    475         }
    476         if (xMax >= 0) {
    477             // x is positive
    478             int min = PS_MAX(0, xMin);  // Minimum value of x when positive
    479             memcpy(&paddedKernel->data.F32[y][min], &kernel->kernel[y][min],
    480                    (xMax - min + 1) * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    481         }
    482     }
    483 
    484     psImage *kernelRealFFT = NULL, *kernelImagFFT = NULL;
    485     if (!psImageForwardFFT(&kernelRealFFT, &kernelImagFFT, paddedKernel)) {
    486         psError(PS_ERR_UNKNOWN, false, _("Failed to fourier transform kernel."));
    487         psFree(inRealFFT);
    488         psFree(inImagFFT);
    489         psFree(paddedKernel);
    490         return NULL;
    491     }
    492     psFree(paddedKernel);
    493 
    494     // Convolution in fourier domain is just a pixel-wise multiplication
    495     if (!psImageComplexMultiply(&inRealFFT, &inImagFFT, inRealFFT, inImagFFT, kernelRealFFT, kernelImagFFT)) {
    496         psError(PS_ERR_UNKNOWN, false, _("Unable to multiply fourier transformts."));
    497         psFree(inRealFFT);
    498         psFree(inImagFFT);
    499         psFree(kernelRealFFT);
    500         psFree(kernelImagFFT);
    501         return NULL;
    502     }
    503     psFree(kernelRealFFT);
    504     psFree(kernelImagFFT);
    505 
    506     psImage *paddedConvolved = NULL; // Padded convolved image
    507     if (!psImageBackwardFFT(&paddedConvolved, inRealFFT, inImagFFT, paddedCols)) {
    508         psError(PS_ERR_UNKNOWN, false, _("Failed to invert fourier transform of convolution image."));
    509         psFree(inRealFFT);
    510         psFree(inImagFFT);
    511         return NULL;
    512     }
    513     psFree(inRealFFT);
    514     psFree(inImagFFT);
    515 
    516     // Trim off the padding, then renormalise (which also does a copy, so there's no parent for the output)
    517     psImage *convolved = psImageSubset(paddedConvolved, psRegionSet(0, numCols, 0, numRows));
    518     psImage *out = (psImage*)psBinaryOp(NULL, convolved, "*",
    519                                         psScalarAlloc(1.0 / paddedCols / paddedRows, PS_TYPE_F32));
    520     psFree(convolved);
    521     psFree(paddedConvolved);
    522 
    523     return out;
    524 }
    525 
    526 
    527388psImage *psImageConvolveMaskFFT(psImage *out, const psImage *mask, psMaskType maskVal,
    528389                                psMaskType setVal, int xMin, int xMax, int yMin, int yMax, float thresh)
     
    573434    psKernel *kernel = psKernelAlloc(xMin, xMax, yMin, yMax);
    574435    psImageInit(kernel->image, 1.0);
    575     psImage *convolved = psImageConvolveFFT(onoff, NULL, 0, kernel, 0.0);
     436    psImage *convolved = psImageConvolveFFT(NULL, onoff, NULL, 0, kernel);
    576437    psFree(onoff);
    577438    psFree(kernel);
Note: See TracChangeset for help on using the changeset viewer.