IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Apr 4, 2011, 12:57:08 PM (15 years ago)
Author:
eugene
Message:

consolidate multiple FITTED stats; updates to psImageBackground based on results from MWV; added memory dump API

Location:
trunk/psLib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib

    • Property svn:ignore
      •  

        old new  
        3434*.bbg
        3535*.da
         36a.out.dSYM
  • trunk/psLib/src/imageops/psImageBackground.c

    r27069 r31152  
    1313#include "psType.h"
    1414#include "psAssert.h"
     15#include "psAbort.h"
    1516#include "psRandom.h"
    1617#include "psError.h"
     
    2324}
    2425
    25 // XXX allow the user to choose the stats method?
    26 // (SAMPLE_MEAN, CLIPPED_MEAN, ROBUST_MEDIAN, FITTED_MEAN)
    2726bool psImageBackground(psStats *stats, psVector **sample, const psImage *image, const psImage *mask, psImageMaskType maskValue, psRandom *rng)
    2827{
     
    4544    long nx = image->numCols;
    4645    long ny = image->numRows;
    47 
    48     psImage *bad = psImageAlloc(nx, ny, PS_TYPE_U8); // Image with bad pixels
    49     psImageInit(bad, 0);
    50 
    51     int Npixels = 0;                    // Total number of pixels
    52     for (int y = 0; y < ny; y++) {
    53         for (int x = 0; x < nx; x++) {
    54             if (!isfinite(image->data.F32[y][x]) ||
    55                 (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[y][x] & maskValue)) {
    56                 bad->data.U8[y][x] = 0xFF;
    57             }
    58             Npixels++;
    59         }
    60     }
    61 
    62     const int Nsubset = (stats->nSubsample == 0) ? Npixels : PS_MIN(stats->nSubsample, Npixels); // Number of pixels in subset
    63 
    64     psVector *values;                   // Vector containing subsample
     46    long nPixels = nx * ny;
     47
     48    long nSubset = (stats->nSubsample == 0) ? (nx * ny) : PS_MIN(stats->nSubsample, (nx * ny));
     49
     50    /*** discussion of the sampling and algorithm choices:
     51
     52         We want stats->nSubsample pixels.  How many good pixels do we actually have? 
     53
     54         There are a few domains of interest:
     55
     56         If nGoodPixels < f1 * nSubset, we should fail [1]
     57         If nSubset >= nGoodPixels, we should just loop over all pixels [2]
     58         If (nSubset <  f2 * nGoodPixels) and (nGoodPixels >= f3 * nPixels), we should just use the random sample technique [3]
     59         If (nSubset >= f2 * nGoodPixels) or  (nGoodPixel  <  f3 * nPixels), we should use the Fisher-Yates technique.
     60
     61         to use Fisher-Yates, we need to have a copy of the pixels so we can re-shuffle.  We
     62         should not do that with the whole list unless we need to.
     63
     64         [1] we just have too few samples to be useful; f1 ~ 0.01?
     65
     66         [2] we need to select from all pixels to reach the desired sample size
     67
     68         [3] this avoids a full-image-sized alloc, but only makes sense if nGoodPixels is actually
     69         large.  f2 ~ 0.01, f3 ~ 0.1 (4 tries per success on average)
     70
     71    ***/
     72
     73    // count the number of good pixels
     74    long nGoodPixels = 0;
     75    for (long iy = 0; iy < ny; iy++) {
     76        for (long ix = 0; ix < nx; ix++) {
     77            if (!isfinite(image->data.F32[iy][ix])) continue;
     78            if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     79            nGoodPixels ++;
     80        }
     81    }
     82
     83    // XXX This value of 1% is somewhat arbitrary
     84    if (nGoodPixels < 0.01*nSubset) {
     85        if ((nFailures < 3) || (nFailures % 100 == 0)) {
     86            psLogMsg("psLib.psImageBackground", PS_LOG_DETAIL, "Unable to measure image background: too few data points (%ld) (%d failures)", nGoodPixels, nFailures);
     87        }
     88        nFailures ++;
     89        psTrace ("psLib.imageops", 4, "case 1: nGoodPixels < 0.01*nSubset (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     90        return false;
     91    }
     92
     93    // alloc or recycle the vector containing subsample pixels
     94    psVector *values;
    6595    if (sample) {
    66         *sample = psVectorRecycle(*sample, Nsubset, PS_TYPE_F32);
     96        *sample = psVectorRecycle(*sample, nSubset, PS_TYPE_F32);
    6797        values = psMemIncrRefCounter(*sample);
    6898        values->n = 0;
    6999    } else {
    70         values = psVectorAllocEmpty(Nsubset, PS_TYPE_F32);
     100        values = psVectorAllocEmpty(nSubset, PS_TYPE_F32);
    71101    }
    72102
     
    75105    float max = -PS_MAX_F32;
    76106
    77     // select a subset of the image pixels to measure the stats
    78     long n = 0;                         // Number of actual pixels in subset
    79     if (Nsubset >= Npixels) {
    80         // if we have an image smaller than Nsubset, just loop over the (good) image pixels
    81         for (int iy = 0; iy < ny; iy++) {
    82             for (int ix = 0; ix < nx; ix++) {
    83                 if (bad->data.U8[iy][ix]) {
    84                     continue;
    85                 }
    86 
    87                 float value = image->data.F32[iy][ix];
    88                 min = PS_MIN(value, min);
    89                 max = PS_MAX(value, max);
    90                 values->data.F32[n] = value;
    91                 n++;
    92             }
    93         }
    94     } else {
    95         // Subsample all pixels
    96         // This is not optimal, since there may be a large masked fraction that leaves us with few good pixels.
    97         // In that case, you should have set Nsubset.......
    98         Npixels = nx * ny;
    99         for (long i = 0; i < Nsubset; i++) {
     107    if ((nSubset < 0.01*nGoodPixels) && (nGoodPixels >= 0.1*nPixels)) {
     108        psTrace ("psLib.imageops", 4, "case 3: nSubset < 0.01*nGoodPixels && nGoodPixels > 0.1*nPixels (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     109        // for small subsets, just use simple random sampling (saves a full-image-sized alloc),
     110        // unless we have lots of masked pixels
     111        for (long i = 0; i < nSubset; i++) {
    100112            double frnd = psRandomUniform(rng);
    101             int pixel = Npixels * frnd;
    102             int ix = pixel % nx;
    103             int iy = pixel / nx;
    104 
    105             if (bad->data.U8[iy][ix]) {
    106                 continue;
    107             }
     113            long pixel = nPixels * frnd;
     114            long ix = pixel % nx;
     115            long iy = pixel / nx;
     116
     117            if (!isfinite(image->data.F32[iy][ix])) continue;
     118            if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
    108119
    109120            float value = image->data.F32[iy][ix];
    110121            min = PS_MIN(value, min);
    111122            max = PS_MAX(value, max);
    112             values->data.F32[n] = value;
    113             n++;
    114         }
    115     }
    116 
    117     psFree(bad);
    118 
    119     if (n < 0.01*Nsubset) {
    120         if ((nFailures < 3) || (nFailures % 100 == 0)) {
    121             psLogMsg("psLib.psImageBackground", PS_LOG_DETAIL, "Unable to measure image background: too few data points (%ld) (%d failures)", n, nFailures);
    122         }
    123         nFailures ++;
    124         psFree(values);
    125         return false;
    126     }
    127 
    128     values->n = n;
     123            psVectorAppend (values, value);
     124        }
     125    } else if (nSubset >= nGoodPixels) {
     126            psTrace ("psLib.imageops", 4, "case 2: nSubset >= nGoodPixels (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     127        // in this case, we have to select from all masked pixels just to get the desired
     128        // sample size
     129        for (long iy = 0; iy < ny; iy++) {
     130            for (long ix = 0; ix < nx; ix++) {
     131                if (!isfinite(image->data.F32[iy][ix])) continue;
     132                if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     133                float value = image->data.F32[iy][ix];
     134                min = PS_MIN(value, min);
     135                max = PS_MAX(value, max);
     136                psVectorAppend(values, value);
     137            }
     138        }
     139    } else {
     140        psTrace ("psLib.imageops", 4, "case 4: Fisher-Yates (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     141        // use Knuth's version of Fisher-Yates to select a random subset of the pixels, but
     142        // only drawing each value once
     143           
     144        // generate a vector of all pixels which may in theory be selected
     145        psVector *pixelVector = psVectorAllocEmpty(nGoodPixels, PS_TYPE_F32);
     146        for (long iy = 0; iy < ny; iy++) {
     147            for (long ix = 0; ix < nx; ix++) {
     148                if (!isfinite(image->data.F32[iy][ix])) continue;
     149                if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     150                psVectorAppend(pixelVector, image->data.F32[iy][ix]);
     151            }
     152        }
     153        psAssert (nGoodPixels == pixelVector->n, "we must have mis-counted above");
     154       
     155        // generate the unique random subset
     156        for (int i = 0; i < nSubset; i++) {
     157            double frnd = psRandomUniform(rng);
     158            int pixel = pixelVector->n * frnd;
     159               
     160            psVectorAppend(values, pixelVector->data.F32[pixel]);
     161            pixelVector->data.F32[pixel] = pixelVector->data.F32[pixelVector->n - 1];
     162            pixelVector->n --;
     163        }
     164        psFree (pixelVector);
     165    }
     166    psAssert (values->n >= 0.01*nSubset, "didn't we test this above?");
    129167
    130168    if (stats->options & PS_STAT_ROBUST_QUARTILE) {
     
    132170        // XXX this hack is just for testing, drop when I am happy with the psStats version of the values
    133171
    134         int imin = stats->min * n;
    135         int imax = stats->max * n;
     172        int imin = stats->min * values->n;
     173        int imax = stats->max * values->n;
    136174        int npts = imax - imin + 1;
    137175
     
    147185        // Subtract the median when we add the numbers, so we don't get numerical problems
    148186        float median = npts % 2 ? 0.5 * (values->data.F32[npts/2 - 1] + values->data.F32[npts/2]) :
    149                        values->data.F32[npts/2];
     187            values->data.F32[npts/2];
    150188        double value = 0;
    151         for (long i = imin; (i <= imax) && (i < n); i++) {
     189        for (long i = imin; (i <= imax) && (i < values->n); i++) {
    152190            value += values->data.F32[i] - median;
    153191        }
     
    185223    return true;
    186224}
     225
     226
     227/***  old comments
     228
     229 // Subsample all pixels
     230 // This is not optimal, since there may be a large masked fraction that leaves us with few good pixels.
     231 // In that case, you should have set Nsubset.......
     232
     233 // 2011/03/22 - MWV:  On prompting from Gene, changed the sampling so that it doesn't build and then sort
     234 //     a huge array when Nsubset << Npixel. 
     235 //   O(Npixel) log(Npixel) is unnecessarily painful when Npixel=38 million (one skycell)
     236 //     but we only needed Nsubset pixels and Nsubset << Npixel.
     237
     238 //   It also perhaps partially defeats any gain of taking only subsamples if we're sorting the array.
     239 //   I should do some performance tests on this anyway.
     240
     241 // If our chance of collision is low and the gain from not sorting the array is likely to be high,
     242 // then just pick randomnly
     243 // Note that we're guaranteeing Nsubset pixels returned here (up to the limit of Npixels)
     244
     245 // 2011/03/16 - MWV:  Fixed double-sampling of sky pixels.
     246 //   The pick-a-random-pixel-at-a-time approach  doesn't work well when Npixels is close to Nsubset
     247 //   If Nsubset is 0.8*Npixels, then we will get lots of pixels double-counted
     248 //   This is correct on average, but isn't the optimal way to estimate the sky level
     249 // Instead we take the following approach:
     250 //   1) Calculate a random ordering of the pixels
     251 //   2) Go through this ordering up to Nsubset to select pixels
     252 // This is O(n log(n))
     253
     254 ***/
Note: See TracChangeset for help on using the changeset viewer.