IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 19058


Ignore:
Timestamp:
Aug 13, 2008, 5:23:13 PM (18 years ago)
Author:
Paul Price
Message:

Learned that of all the fftw_ calls, only fftw_execute is thread-safe: all other calls have to be wrapped in a mutex for thread safety.

Location:
trunk/psLib/src/fft
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/fft/psFFT.c

    r19004 r19058  
    44
    55#include <fftw3.h>
     6#include <pthread.h>
    67
    78#include "psAssert.h"
    89#include "psLogMsg.h"
    910#include "psFFT.h"
     11
     12static pthread_mutex_t fftwLock = PTHREAD_MUTEX_INITIALIZER; // Lock for FFTW
     13
     14void psFFTLock(void)
     15{
     16    pthread_mutex_lock(&fftwLock);
     17}
     18
     19void psFFTUnlock(void)
     20{
     21    pthread_mutex_unlock(&fftwLock);
     22}
    1023
    1124bool psFFTThreads(int threads)
  • trunk/psLib/src/fft/psFFT.h

    r18956 r19058  
    22#define PS_FFT_H
    33
    4 /// Set number of threads to use with all FFTs
     4// Only fftw_execute is thread-safe: all other calls have to be wrapped in a mutex
     5
     6/// Lock FFT mutex
     7void psFFTLock(void);
     8
     9/// Unlock FFT mutex
     10void psFFTUnlock(void);
     11
     12/// Set number of threads for FFTW to use
    513///
    6 /// Pass zero to turn off threading
     14/// Pass zero to turn off threading.  Note that the FFTW threads are *independent* of any other threads set up
     15/// throuhg, e.g., psThread.
    716bool psFFTThreads(int threads           // Number of threads
    817    );
  • trunk/psLib/src/fft/psImageFFT.c

    r17377 r19058  
    66/// @author Robert DeSonia, MHPCC
    77///
    8 /// @version $Revision: 1.26 $ $Name: not supported by cvs2svn $
    9 /// @date $Date: 2008-04-08 01:25:42 $
     8/// @version $Revision: 1.27 $ $Name: not supported by cvs2svn $
     9/// @date $Date: 2008-08-14 03:23:13 $
    1010///
    1111/// Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
     
    2020#include <complex.h>
    2121#include <fftw3.h>
     22#include <pthread.h>
    2223
    2324#include "psAssert.h"
     
    2829#include "psImageStructManip.h"
    2930#include "psImageConvolve.h"
     31#include "psThread.h"
     32#include "psFFT.h"
    3033#include "psImageFFT.h"
    3134
     35// Lock FFTW access
     36#define FFTW_LOCK \
     37if (threaded) { \
     38    psFFTLock(); \
     39}
     40// Unlock FFTW access
     41#define FFTW_UNLOCK \
     42if (threaded) { \
     43    psFFTUnlock(); \
     44}
    3245
    3346#define FFTW_PLAN_RIGOR FFTW_ESTIMATE   // How rigorous the FFTW planning is
     
    4255    PS_ASSERT_PTR_NON_NULL(imag, false);
    4356
     57    bool threaded = psThreadPoolSize(); // Are we running threaded?
     58
    4459    // Make sure the system-level wisdom information is imported.
     60    FFTW_LOCK;
    4561    if (!fftwWisdomImported) {
    4662        fftwf_import_system_wisdom();
    4763        fftwWisdomImported = true;
    4864    }
     65    FFTW_UNLOCK;
    4966
    5067    int numCols = in->numCols;          // Number of columns
     
    6481
    6582    // Do the FFT
     83
     84    FFTW_LOCK;
    6685    fftwf_complex *out = fftwf_malloc((numCols/2 + 1) * numRows * sizeof(fftwf_complex)); // Output data
    6786    fftwf_plan plan = fftwf_plan_dft_r2c_2d(numRows, numCols, input, out, FFTW_PLAN_RIGOR);
     87    FFTW_UNLOCK;
     88
    6889    fftwf_execute(plan);
     90
     91    FFTW_LOCK;
    6992    fftwf_destroy_plan(plan);
     93    FFTW_UNLOCK;
     94
    7095    psFree(input);
    7196
     
    87112        }
    88113    }
     114
     115    FFTW_LOCK;
    89116    fftwf_free(out);
     117    FFTW_UNLOCK;
    90118
    91119    return true;
     
    101129    PS_ASSERT_PTR_NON_NULL(out, false);
    102130
     131    bool threaded = psThreadPoolSize(); // Are we running threaded?
     132
    103133    int numCols = real->numCols;        // Number of columns
    104134    int numRows = real->numRows;        // Number of rows
     
    120150    }
    121151
    122 
    123152    // Make sure the system-level wisdom information is imported.
     153    FFTW_LOCK;
    124154    if (!fftwWisdomImported) {
    125155        fftwf_import_system_wisdom();
    126156        fftwWisdomImported = true;
    127157    }
     158    FFTW_UNLOCK;
    128159
    129160    // Stuff the real and imaginary parts in
    130161    psF32 *target = psAlloc(2 * numCols * numRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Target for FFTW
     162    FFTW_LOCK;
    131163    fftwf_complex *in = fftwf_malloc(numCols * numRows * sizeof(fftwf_complex)); // Input data
     164    FFTW_UNLOCK;
    132165
    133166    for (int y = 0, index = 0; y < numRows; y++) {
     
    145178
    146179    // Do the FFT
     180    FFTW_LOCK;
    147181    fftwf_plan plan = fftwf_plan_dft_c2r_2d(numRows, origCols, in, target, FFTW_PLAN_RIGOR);
     182    FFTW_UNLOCK;
     183
    148184    fftwf_execute(plan);
     185
     186    FFTW_LOCK;
    149187    fftwf_destroy_plan(plan);
    150188    fftwf_free(in);
     189    FFTW_UNLOCK;
    151190
    152191    // Copy the target pixels into the output
     
    288327    }
    289328
     329    bool threaded = psThreadPoolSize(); // Are we running threaded?
     330
    290331    int numCols = in->numCols, numRows = in->numRows; // Size of image
    291332    int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes
     
    304345
    305346    // Create data array containing the padded image and padded kernel
     347    FFTW_LOCK;
    306348    psF32 *data = fftwf_malloc(2 * numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW
     349    FFTW_UNLOCK;
    307350    psF32 *dataPtr = data;              // Pointer into FFTW data
    308351    psF32 **imageData = in->data.F32;   // Pointer into image data
     
    317360    memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32));
    318361
    319 #if 1
     362#if 0
    320363    {
    321364        // Use this for inspecting the result of copying the image
     
    379422    }
    380423
    381 #if 1
     424#if 0
    382425    {
    383426        // Use this for inspecting the result of copying the kernel
     
    409452    // Do the forward FFT
    410453    // Note that the FFT images have different size from the input
     454    FFTW_LOCK;
    411455    fftwf_complex *fft = fftwf_malloc(2 * (paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT
     456    FFTW_UNLOCK;
    412457    int size[] = { paddedRows, paddedCols }; // Size of transforms
    413458    int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images
    414459    int fftPixels = fftCols * fftRows;  // Number of pixels in FFT image
     460
     461    FFTW_LOCK;
    415462    fftwf_plan forward = fftwf_plan_many_dft_r2c(2, size, 2, data, NULL, 1, paddedCols * paddedRows,
    416463                                                 fft, NULL, 1, fftPixels, FFTW_PLAN_RIGOR);
     464    FFTW_UNLOCK;
     465
    417466    fftwf_execute(forward);
     467
     468    FFTW_LOCK;
    418469    fftwf_destroy_plan(forward);
     470    FFTW_UNLOCK;
    419471
    420472    // Multiply the two transforms
     
    434486
    435487    // Do the backward FFT
     488    FFTW_LOCK;
    436489    fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR);
     490    FFTW_UNLOCK;
     491
    437492    fftwf_execute(backward);
     493
     494    FFTW_LOCK;
    438495    fftwf_destroy_plan(backward);
    439496    fftwf_free(fft);
     497    FFTW_UNLOCK;
    440498
    441499    // Copy into the target, without the padding
     
    446504        memcpy(*outData, dataPtr, goodBytes);
    447505    }
     506
     507    FFTW_LOCK;
    448508    fftwf_free(data);
     509    FFTW_UNLOCK;
    449510
    450511    return out;
  • trunk/psLib/src/fft/psVectorFFT.c

    r11719 r19058  
    66 *  @author Robert DeSonia, MHPCC
    77 *
    8  *  @version $Revision: 1.39 $ $Name: not supported by cvs2svn $
    9  *  @date $Date: 2007-02-09 00:45:51 $
     8 *  @version $Revision: 1.40 $ $Name: not supported by cvs2svn $
     9 *  @date $Date: 2008-08-14 03:23:13 $
    1010 *
    1111 *  Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
     
    2727#include "psLogMsg.h"
    2828#include "psConstants.h"
     29#include "psThread.h"
     30#include "psFFT.h"
    2931#include "psVectorFFT.h"
     32
     33// Lock FFTW access
     34#define FFTW_LOCK \
     35if (threaded) { \
     36    psFFTLock(); \
     37}
     38// Unlock FFTW access
     39#define FFTW_UNLOCK \
     40if (threaded) { \
     41    psFFTUnlock(); \
     42}
    3043
    3144#define FFTW_PLAN_RIGOR FFTW_ESTIMATE   // How rigorous the FFTW planning is
     
    4053    PS_ASSERT_PTR_NON_NULL(imag, false);
    4154
     55    bool threaded = psThreadPoolSize(); // Are we running threaded?
     56
    4257    // Make sure the system-level wisdom information is imported.
     58    FFTW_LOCK;
    4359    if (!fftwWisdomImported) {
    4460        fftwf_import_system_wisdom();
    4561        fftwWisdomImported = true;
    4662    }
     63    FFTW_UNLOCK;
    4764
    4865    long num = in->n;                   // Number of elements
    4966
    5067    // Do the FFT
     68    FFTW_LOCK;
    5169    fftwf_complex *out = fftwf_malloc((num/2 + 1) * sizeof(fftwf_complex)); // Output data
    5270    fftwf_plan plan = fftwf_plan_dft_r2c_1d(num, in->data.F32, out, FFTW_PLAN_RIGOR);
     71    FFTW_UNLOCK;
     72
    5373    fftwf_execute(plan);
     74
     75    FFTW_LOCK;
    5476    fftwf_destroy_plan(plan);
     77    FFTW_UNLOCK;
    5578
    5679    // Pull the real and imaginary parts out
     
    6992#endif
    7093    }
     94
     95    FFTW_LOCK;
    7196    fftwf_free(out);
     97    FFTW_UNLOCK;
    7298
    7399    return true;
     
    83109    PS_ASSERT_PTR_NON_NULL(out, false);
    84110
     111    bool threaded = psThreadPoolSize(); // Are we running threaded?
     112
    85113    // Make sure the system-level wisdom information is imported.
     114    FFTW_LOCK;
    86115    if (!fftwWisdomImported) {
    87116        fftwf_import_system_wisdom();
    88117        fftwWisdomImported = true;
    89118    }
     119    FFTW_UNLOCK;
    90120
    91121    long num = real->n;                 // Number of elements
    92122
    93123    // Stuff the real and imaginary parts in
     124    FFTW_LOCK;
    94125    fftwf_complex *in = fftwf_malloc(num * sizeof(fftwf_complex)); // Input data
     126    FFTW_UNLOCK;
    95127    for (int i = 0; i < num; i++) {
    96128#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
     
    107139    // Do the FFT
    108140    *out = psVectorRecycle(*out, origNum, PS_TYPE_F32);
     141    FFTW_LOCK;
    109142    fftwf_plan plan = fftwf_plan_dft_c2r_1d(origNum, in, (*out)->data.F32, FFTW_PLAN_RIGOR);
     143    FFTW_UNLOCK;
     144
    110145    fftwf_execute(plan);
     146
     147    FFTW_LOCK;
    111148    fftwf_destroy_plan(plan);
    112 
    113149    fftwf_free(in);
     150    FFTW_UNLOCK;
    114151
    115152    return true;
Note: See TracChangeset for help on using the changeset viewer.