IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 28910


Ignore:
Timestamp:
Aug 13, 2010, 12:19:38 PM (16 years ago)
Author:
eugene
Message:

add functions for convolutions with kernels using pre-computed FFTs

Location:
branches/eam_branches/ipp-20100621/psLib/src/fft
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/eam_branches/ipp-20100621/psLib/src/fft/psImageFFT.c

    r21183 r28910  
    532532    return out;
    533533}
     534
     535void psKernelFFTFree(psKernelFFT *kernel) {
     536
     537    if (!kernel->fft) return;
     538   
     539    bool threaded = psThreadPoolSize(); // Are we running threaded?
     540
     541    FFTW_LOCK;
     542    fftwf_free(kernel->fft);
     543    FFTW_UNLOCK;
     544
     545    return;
     546}
     547
     548psKernelFFT *psKernelFFTAlloc(const psKernel *input) {
     549
     550    psKernelFFT *kernel = psAlloc(sizeof(psKernelFFT));
     551    psMemSetDeallocator(kernel, (psFreeFunc)psKernelFFTFree);
     552
     553    kernel->fft = NULL;
     554
     555    kernel->xMin = input->xMin;
     556    kernel->xMax = input->xMax;
     557    kernel->yMin = input->yMin;
     558    kernel->yMax = input->yMax;
     559
     560    kernel->paddedCols = 0;
     561    kernel->paddedRows = 0;
     562
     563    return kernel;
     564}
     565
     566// generate the FFTed kernel image appropriate to be used for convolution of the given input image
     567psKernelFFT *psImageConvolveKernelInit(const psImage *in, const psKernel *kernel)
     568{
     569    PS_ASSERT_IMAGE_NON_NULL(in, NULL);
     570    PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
     571    PS_ASSERT_KERNEL_NON_NULL(kernel, NULL);
     572
     573    bool threaded = psThreadPoolSize(); // Are we running threaded?
     574
     575    int numCols = in->numCols, numRows = in->numRows; // Size of image
     576    int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes
     577
     578    // Need to pad the kernel (and later the input image) to protect from wrap-around effects
     579    if (xMax - xMin > numCols || yMax - yMin > numRows) {
     580        // Cannot pad the image if the kernel is larger.
     581        psError(PS_ERR_BAD_PARAMETER_SIZE, true,
     582                _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."),
     583                xMax, yMax, numCols, numRows);
     584        return NULL;
     585    }
     586
     587    int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
     588    int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
     589
     590#if CONVOLVE_FFT_BINARY_SIZE
     591    // Make the size an integer power of two
     592    {
     593        int twoCols, twoRows;           // Size that is a factor of two
     594        for (twoCols = 1; twoCols <= paddedCols && twoCols < INT_MAX - 1; twoCols <<= 1); // No action
     595        for (twoRows = 1; twoRows <= paddedRows && twoRows < INT_MAX - 1; twoRows <<= 1); // No action
     596        if (paddedCols > twoCols || paddedRows > twoRows) {
     597            psError(PS_ERR_BAD_PARAMETER_SIZE, true, "Unable to scale size (%dx%d) up to factor of two",
     598                    paddedCols, paddedRows);
     599            return NULL;
     600        }
     601        paddedCols = twoCols;
     602        paddedRows = twoRows;
     603    }
     604#endif
     605
     606    int numPadded = paddedCols * paddedRows; // Number of pixels in padded image
     607
     608    // Create data array containing the padded kernel
     609    FFTW_LOCK;
     610    psF32 *data = fftwf_malloc(numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW
     611    FFTW_UNLOCK;
     612
     613    // Generate the padded kernel image.  We could generate the padded kernel image using
     614    // memcpy, but by going pixel by pixel we can apply the normalisation that corrects for the
     615    // FFT renormalisation.  By applying it to the kernel here, we save applying it to the
     616    // output image(s).
     617
     618    // copy kernel into data array
     619    psF32 *dataPtr = data;              // Pointer into FFTW data
     620    float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT
     621
     622    int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative
     623    int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive
     624    int yNegMin = PS_MIN(-1, yMin), yNegMax = PS_MIN(-1, yMax); // Min and max for x when negative
     625    int yPosMin = PS_MAX(0, yMin), yPosMax = PS_MAX(0, yMax); // Min and max for x when positive
     626    int blankCols = xNegMin + paddedCols - xPosMax - 1; // Number of columns between kernel extrema
     627    int blankRows = (yNegMin + paddedRows - yPosMax - 1) * paddedCols; // Rows between kernel extrema
     628    size_t blankColBytes = blankCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes in blankCols
     629    for (int y = yPosMin; y <= yPosMax; y++) {
     630        // y is positive
     631        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
     632            // x is positive
     633            *dataPtr = kernel->kernel[y][x] * norm;
     634        }
     635        // Columns between kernel extrema
     636        memset(dataPtr, 0, blankColBytes);
     637        dataPtr += blankCols;
     638        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
     639            // x is negative
     640            *dataPtr = kernel->kernel[y][x] * norm;
     641        }
     642    }
     643    // Rows between kernel extrema
     644    memset(dataPtr, 0, blankRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     645    dataPtr += blankRows;
     646    for (int y = yNegMin; y <= yNegMax; y++) {
     647        // y is negative
     648        for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) {
     649            // x is positive
     650            *dataPtr = kernel->kernel[y][x] * norm;
     651        }
     652        // Columns between kernel extrema
     653        memset(dataPtr, 0, blankColBytes);
     654        dataPtr += blankCols;
     655        for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) {
     656            // x is negative
     657            *dataPtr = kernel->kernel[y][x] * norm;
     658        }
     659    }
     660
     661#if 0
     662    {
     663        // Use this for inspecting the result of copying the kernel
     664        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
     665        psFree(test->p_rawDataBuffer);
     666        test->p_rawDataBuffer = data;
     667        test->data.V[0] = test->p_rawDataBuffer;
     668        for (int y = 1; y < paddedRows; y++) {
     669            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] + paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     670        }
     671        // View image here or write to disk
     672        test->p_rawDataBuffer = NULL;
     673        psFree(test);
     674    }
     675#endif
     676
     677    // Do the forward FFT
     678    // Note that the FFT images have different size from the input
     679    FFTW_LOCK;
     680    fftwf_complex *fft = fftwf_malloc((paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT
     681    FFTW_UNLOCK;
     682
     683    FFTW_LOCK;
     684    fftwf_plan plan = fftwf_plan_dft_r2c_2d(paddedRows, paddedCols, data, fft, FFTW_PLAN_RIGOR);
     685    FFTW_UNLOCK;
     686
     687    fftwf_execute(plan);
     688
     689    FFTW_LOCK;
     690    fftwf_destroy_plan(plan);
     691    fftwf_free(data);
     692    FFTW_UNLOCK;
     693
     694    psKernelFFT *output = psKernelFFTAlloc(kernel);
     695    output->fft = fft;
     696    output->paddedCols = paddedCols;
     697    output->paddedRows = paddedRows;
     698
     699    return output;
     700}
     701
     702psImage *psImageConvolveKernel(psImage *out, const psImage *in, const psImage *mask, psImageMaskType maskVal, const psKernelFFT *kernel)
     703{
     704    PS_ASSERT_IMAGE_NON_NULL(in, NULL);
     705    PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL);
     706    PS_ASSERT_PTR_NON_NULL(kernel, NULL);
     707    PS_ASSERT_PTR_NON_NULL(kernel->fft, NULL);
     708    if (mask) {
     709        PS_ASSERT_IMAGE_NON_NULL(mask, NULL);
     710        PS_ASSERT_IMAGE_TYPE(mask, PS_TYPE_IMAGE_MASK, NULL);
     711        PS_ASSERT_IMAGES_SIZE_EQUAL(mask, in, NULL);
     712    }
     713
     714    bool threaded = psThreadPoolSize(); // Are we running threaded?
     715
     716    int numCols = in->numCols, numRows = in->numRows; // Size of image
     717    int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes
     718
     719    // Need to pad the input image to protect from wrap-around effects
     720    if (xMax - xMin > numCols || yMax - yMin > numRows) {
     721        // Cannot pad the image if the kernel is larger.
     722        psError(PS_ERR_BAD_PARAMETER_SIZE, true, _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."), xMax, yMax, numCols, numRows);
     723        return NULL;
     724    }
     725
     726    int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image
     727    int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image
     728
     729#if CONVOLVE_FFT_BINARY_SIZE
     730    // Make the size an integer power of two
     731    {
     732        int twoCols, twoRows;           // Size that is a factor of two
     733        for (twoCols = 1; twoCols <= paddedCols && twoCols < INT_MAX - 1; twoCols <<= 1); // No action
     734        for (twoRows = 1; twoRows <= paddedRows && twoRows < INT_MAX - 1; twoRows <<= 1); // No action
     735        if (paddedCols > twoCols || paddedRows > twoRows) {
     736            psError(PS_ERR_BAD_PARAMETER_SIZE, true, "Unable to scale size (%dx%d) up to factor of two",
     737                    paddedCols, paddedRows);
     738            return NULL;
     739        }
     740        paddedCols = twoCols;
     741        paddedRows = twoRows;
     742    }
     743#endif
     744
     745    if (paddedCols != kernel->paddedCols) {
     746        psError(PS_ERR_BAD_PARAMETER_SIZE, true, _("Image is inconsistent with allocated kernel"));
     747        return NULL;
     748    }   
     749    if (paddedRows != kernel->paddedRows) {
     750        psError(PS_ERR_BAD_PARAMETER_SIZE, true, _("Image is inconsistent with allocated kernel"));
     751        return NULL;
     752    }   
     753
     754    int numPadded = paddedCols * paddedRows; // Number of pixels in padded image
     755
     756    // Create data array containing the padded image
     757    FFTW_LOCK;
     758    psF32 *data = fftwf_malloc(numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW
     759    FFTW_UNLOCK;
     760    psF32 *dataPtr = data;              // Pointer into FFTW data
     761    psF32 **imageData = in->data.F32;   // Pointer into image data
     762
     763    // Copy image into data array
     764    size_t goodBytes = numCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes per image row
     765    size_t padBytes = (paddedCols - numCols) * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes to pad
     766    for (int y = 0; y < numRows; y++, dataPtr += paddedCols, imageData++) {
     767        memcpy(dataPtr, *imageData, goodBytes);
     768        memset(dataPtr + numCols, 0, padBytes);
     769    }
     770    memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     771
     772#if 0
     773    {
     774        // Use this for inspecting the result of copying the image
     775        psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32);
     776        psFree(test->p_rawDataBuffer);
     777        test->p_rawDataBuffer = data;
     778        test->data.V[0] = test->p_rawDataBuffer;
     779        for (int y = 1; y < paddedRows; y++) {
     780            test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] +
     781                                      paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
     782        }
     783        // View image here
     784        test->p_rawDataBuffer = NULL;
     785        psFree(test);
     786    }
     787#endif
     788
     789    // Mask bad pixels (which may be NANs), lest they infect everything
     790    if (mask && maskVal) {
     791        for (int y = 0; y < numRows; y++) {
     792            for (int x = 0; x < numCols; x++) {
     793                if (mask->data.PS_TYPE_IMAGE_MASK_DATA[y][x] & maskVal) {
     794                    data[x + paddedCols * y] = 0;
     795                }
     796            }
     797        }
     798    }
     799
     800    // Do the forward FFT
     801    // Note that the FFT images have different size from the input
     802    FFTW_LOCK;
     803    fftwf_complex *fft = fftwf_malloc((paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT
     804    FFTW_UNLOCK;
     805
     806    int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images
     807    int fftPixels = fftCols * fftRows;  // Number of pixels in FFT image
     808
     809    FFTW_LOCK;
     810    fftwf_plan forward = fftwf_plan_dft_r2c_2d(paddedRows, paddedCols, data, fft, FFTW_PLAN_RIGOR);
     811    FFTW_UNLOCK;
     812
     813    fftwf_execute(forward);
     814
     815    FFTW_LOCK;
     816    fftwf_destroy_plan(forward);
     817    FFTW_UNLOCK;
     818
     819    fftwf_complex *kernelFFT = kernel->fft;
     820
     821    // Multiply the two transforms
     822    for (int i = 0; i < fftPixels; i++) {
     823        // (a + bi) * (c + di) = (ac - bd) + (bc + ad)i
     824#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
     825        // C99 complex support
     826        fft[i] *= kernelFFT[i];
     827#else
     828        // FFTW's backup complex support
     829        float imageReal = fft[i][0], imageImag = fft[i][1];
     830        float kernelReal = kernelFFT[i][0], kernelImag = kernelFFT[i][1];
     831        fft[i][0] = imageReal * kernelReal - imageImag * kernelImag;
     832        fft[i][1] = imageImag * kernelReal + imageReal * kernelImag;
     833#endif
     834    }
     835
     836    // Do the backward FFT
     837    FFTW_LOCK;
     838    fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR);
     839    FFTW_UNLOCK;
     840
     841    fftwf_execute(backward);
     842
     843    FFTW_LOCK;
     844    fftwf_destroy_plan(backward);
     845    fftwf_free(fft);
     846    FFTW_UNLOCK;
     847
     848    // Copy into the target, without the padding
     849    out = psImageRecycle(out, numCols, numRows, PS_TYPE_F32);
     850    psF32 **outData = out->data.F32;    // Pointer into output
     851    dataPtr = data;                     // Reset to start
     852    for (int y = 0; y < numRows; y++, outData++, dataPtr += paddedCols) {
     853        memcpy(*outData, dataPtr, goodBytes);
     854    }
     855
     856    FFTW_LOCK;
     857    fftwf_free(data);
     858    FFTW_UNLOCK;
     859
     860    return out;
     861}
  • branches/eam_branches/ipp-20100621/psLib/src/fft/psImageFFT.h

    r21183 r28910  
    1515#include "psImage.h"
    1616#include "psImageConvolve.h"
     17
     18typedef struct {
     19    void *fft;
     20    int xMin;                          ///< Most negative x index
     21    int yMin;                          ///< Most negative y index
     22    int xMax;                          ///< Most positive x index
     23    int yMax;                          ///< Most positive y index
     24    int paddedCols;
     25    int paddedRows;
     26} psKernelFFT;
    1727
    1828/// @addtogroup MathOps Mathematical Operations
     
    7181    const psImage *mask,                ///< Corresponding mask
    7282    psImageMaskType maskVal,            ///< Value to mask
    73     const psKernel *kernel              ///< kernel to colvolve with
     83    const psKernel *kernel              ///< kernel to convolve with
     84    );
     85
     86/// Allocate an the psKernelFFT structure
     87psKernelFFT *psKernelFFTAlloc(
     88    const psKernel *input               ///< kernel to convolve with
    7489);
     90
     91/// Generate an FFT'ed kernel suitable for convolution with the given image
     92psKernelFFT *psImageConvolveKernelInit(
     93    const psImage *in,                  ///< representative image to convolve
     94    const psKernel *kernel              ///< kernel to convolve with
     95    );
     96
     97/// Convolve the given image with the pre-FFT'ed kernel
     98psImage *psImageConvolveKernel(
     99    psImage *out,                       ///< Output image, or NULL
     100    const psImage *in,                  ///< Image to convolve
     101    const psImage *mask,                ///< Corresponding mask
     102    psImageMaskType maskVal,            ///< Value to mask
     103    const psKernelFFT *kernel           ///< kernel to convolve with
     104    );
    75105
    76106/// @}
Note: See TracChangeset for help on using the changeset viewer.