IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 31029


Ignore:
Timestamp:
Mar 23, 2011, 4:22:31 PM (15 years ago)
Author:
eugene
Message:

use Fisher-Yates to select unique pixels in the subsample for appropriate sample sizes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/eam_branches/ipp-20110213/psLib/src/imageops/psImageBackground.c

    r31014 r31029  
    1313#include "psType.h"
    1414#include "psAssert.h"
     15#include "psAbort.h"
    1516#include "psRandom.h"
    1617#include "psError.h"
     
    2324}
    2425
    25 // XXX allow the user to choose the stats method?
    26 // (SAMPLE_MEAN, CLIPPED_MEAN, ROBUST_MEDIAN, FITTED_MEAN)
    2726bool psImageBackground(psStats *stats, psVector **sample, const psImage *image, const psImage *mask, psImageMaskType maskValue, psRandom *rng)
    2827{
     
    4544    long nx = image->numCols;
    4645    long ny = image->numRows;
    47 
    48     psImage *bad = psImageAlloc(nx, ny, PS_TYPE_U8); // Image with bad pixels
    49     psImageInit(bad, 0);
    50 
    51     int Npixels = 0;                    // Total number of pixels
    52     for (int y = 0; y < ny; y++) {
    53         for (int x = 0; x < nx; x++) {
    54             if (!isfinite(image->data.F32[y][x]) ||
    55                 (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[y][x] & maskValue)) {
    56                 bad->data.U8[y][x] = 0xFF;
    57             }
    58             Npixels++;
    59         }
    60     }
    61 
    62     const int Nsubset = (stats->nSubsample == 0) ? Npixels : PS_MIN(stats->nSubsample, Npixels); // Number of pixels in subset
    63 
    64     psVector *values;                   // Vector containing subsample
     46    long nPixels = nx * ny;
     47
     48    long nSubset = (stats->nSubsample == 0) ? (nx * ny) : PS_MIN(stats->nSubsample, (nx * ny));
     49
     50    /*** discussion of the sampling and algorithm choices:
     51
     52         We want stats->nSubsample pixels.  How many good pixels do we actually have? 
     53
     54         There are a few domains of interest:
     55
     56         If nGoodPixels < f1 * nSubset, we should fail [1]
     57         If nSubset >= nGoodPixels, we should just loop over all pixels [2]
     58         If (nSubset <  f2 * nGoodPixels) and (nGoodPixels >= f3 * nPixels), we should just use the random sample technique [3]
     59         If (nSubset >= f2 * nGoodPixels) or  (nGoodPixel  <  f3 * nPixels), we should use the Fisher-Yates technique.
     60
     61         to use Fisher-Yates, we need to have a copy of the pixels so we can re-shuffle.  We
     62         should not do that with the whole list unless we need to.
     63
     64         [1] we just have too few samples to be useful; f1 ~ 0.01?
     65
     66         [2] we need to select from all pixels to reach the desired sample size
     67
     68         [3] this avoids a full-image-sized alloc, but only makes sense if nGoodPixels is actually
     69         large.  f2 ~ 0.01, f3 ~ 0.1 (4 tries per success on average)
     70
     71    ***/
     72
     73    // count the number of good pixels
     74    long nGoodPixels = 0;
     75    for (long iy = 0; iy < ny; iy++) {
     76        for (long ix = 0; ix < nx; ix++) {
     77            if (!isfinite(image->data.F32[iy][ix])) continue;
     78            if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     79            nGoodPixels ++;
     80        }
     81    }
     82
     83    // XXX This value of 1% is somewhat arbitrary
     84    if (nGoodPixels < 0.01*nSubset) {
     85        if ((nFailures < 3) || (nFailures % 100 == 0)) {
     86            psLogMsg("psLib.psImageBackground", PS_LOG_DETAIL, "Unable to measure image background: too few data points (%ld) (%d failures)", nGoodPixels, nFailures);
     87        }
     88        nFailures ++;
     89        psTrace ("psLib.imageops", 4, "case 1: nGoodPixels < 0.01*nSubset (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     90        return false;
     91    }
     92
     93    // alloc or recycle the vector containing subsample pixels
     94    psVector *values;
    6595    if (sample) {
    66         *sample = psVectorRecycle(*sample, Nsubset, PS_TYPE_F32);
     96        *sample = psVectorRecycle(*sample, nSubset, PS_TYPE_F32);
    6797        values = psMemIncrRefCounter(*sample);
    6898        values->n = 0;
    6999    } else {
    70         values = psVectorAllocEmpty(Nsubset, PS_TYPE_F32);
     100        values = psVectorAllocEmpty(nSubset, PS_TYPE_F32);
    71101    }
    72102
     
    75105    float max = -PS_MAX_F32;
    76106
    77     // select a subset of the image pixels to measure the stats
    78     long n = 0;                         // Number of actual pixels in subset
    79     if (Nsubset >= Npixels) {
    80         // if we have an image smaller than Nsubset, just loop over the (good) image pixels
    81         for (int iy = 0; iy < ny; iy++) {
    82             for (int ix = 0; ix < nx; ix++) {
    83                 if (bad->data.U8[iy][ix]) {
    84                     continue;
    85                 }
    86 
    87                 float value = image->data.F32[iy][ix];
    88                 // 2011/03/16 - MWV:  Keep a running overall min and max
    89                 min = PS_MIN(value, min);
    90                 max = PS_MAX(value, max);
    91                 values->data.F32[n] = value;
    92                 n++;
    93             }
    94         }
     107    if ((nSubset < 0.01*nGoodPixels) && (nGoodPixels >= 0.1*nPixels)) {
     108        psTrace ("psLib.imageops", 4, "case 3: nSubset < 0.01*nGoodPixels && nGoodPixels > 0.1*nPixels (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     109        // for small subsets, just use simple random sampling (saves a full-image-sized alloc),
     110        // unless we have lots of masked pixels
     111        for (long i = 0; i < nSubset; i++) {
     112            double frnd = psRandomUniform(rng);
     113            long pixel = nPixels * frnd;
     114            long ix = pixel % nx;
     115            long iy = pixel / nx;
     116
     117            if (!isfinite(image->data.F32[iy][ix])) continue;
     118            if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     119
     120            float value = image->data.F32[iy][ix];
     121            min = PS_MIN(value, min);
     122            max = PS_MAX(value, max);
     123            psVectorAppend (values, value);
     124        }
     125    } else if (nSubset >= nGoodPixels) {
     126            psTrace ("psLib.imageops", 4, "case 2: nSubset >= nGoodPixels (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     127        // in this case, we have to select from all masked pixels just to get the desired
     128        // sample size
     129        for (long iy = 0; iy < ny; iy++) {
     130            for (long ix = 0; ix < nx; ix++) {
     131                if (!isfinite(image->data.F32[iy][ix])) continue;
     132                if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     133                float value = image->data.F32[iy][ix];
     134                min = PS_MIN(value, min);
     135                max = PS_MAX(value, max);
     136                psVectorAppend(values, value);
     137            }
     138        }
    95139    } else {
    96         // 2011/03/16 - MWV: Hmmm... this overwites the previously defined Npixels that only counted finite-valued pixels.
    97         Npixels = nx * ny;
    98 
    99         // Subsample all pixels
    100         // This is not optimal, since there may be a large masked fraction that leaves us with few good pixels.
    101         // In that case, you should have set Nsubset.......
    102 
    103         // 2011/03/22 - MWV:  On prompting from Gene, changed the sampling so that it doesn't build and then sort
    104         //     a huge array when Nsubset << Npixel. 
    105         //   O(Npixel) log(Npixel) is unnecessarily painful when Npixel=38 million (one skycell)
    106         //     but we only needed Nsubset pixels and Nsubset << Npixel.
    107 
    108         //   It also perhaps partially defeats any gain of taking only subsamples if we're sorting the array.
    109         //   I should do some performance tests on this anyway.
    110 
    111         // If our chance of collision is low and the gain from not sorting the array is likely to be high,
    112         // then just pick randomnly
    113         // Note that we're guaranteeing Nsubset pixels returned here (up to the limit of Npixels)
    114         if (Nsubset < 0.1 * Npixels) {
    115             for (long i = 0; n < Nsubset && i < Npixels; i++) {
    116                 float frnd = psRandomUniform(rng);
    117                 int pixel = Npixels * frnd;
    118                 int ix = pixel % nx;
    119                 int iy = pixel / nx;
    120 
    121                 if (bad->data.U8[iy][ix]) {
    122                     continue;
    123                 }
    124 
    125                 float value = image->data.F32[iy][ix];
    126                 min = PS_MIN(value, min);
    127                 max = PS_MAX(value, max);
    128                 values->data.F32[n] = value;
    129                 n++;
    130             }
    131         } else {
    132             // 2011/03/16 - MWV:  Fixed double-sampling of sky pixels.
    133             //   The pick-a-random-pixel-at-a-time approach  doesn't work well when Npixels is close to Nsubset
    134             //   If Nsubset is 0.8*Npixels, then we will get lots of pixels double-counted
    135             //   This is correct on average, but isn't the optimal way to estimate the sky level
    136             // Instead we take the following approach:
    137             //   1) Calculate a random ordering of the pixels
    138             //   2) Go through this ordering up to Nsubset to select pixels
    139             // This is O(n log(n))
    140             psVector *frndPixelOrder = psVectorAlloc(Npixels, PS_TYPE_F32);
    141             for (long i = 0; i < Npixels; i++) {
    142                 frndPixelOrder->data.F32[i] = psRandomUniform(rng);
    143             }
    144             // Now sort the array so that we end up with a list of the pixels in random order
    145             psVector *frndSortedPixelOrder = psVectorSortIndex(NULL, frndPixelOrder);
    146             // Now loop in our new sorted order;
    147             // Note that it's the number of good samples found, "n", that will generally terminate the loop
    148             for (long i = 0; n < Nsubset && i < Npixels; i++) {
    149                 int pixel = frndSortedPixelOrder->data.S32[i];
    150                 int ix = pixel % nx;
    151                 int iy = pixel / nx;
    152 
    153                 if (bad->data.U8[iy][ix]) {
    154                     continue;
    155                 }
    156 
    157                 float value = image->data.F32[iy][ix];
    158                 min = PS_MIN(value, min);
    159                 max = PS_MAX(value, max);
    160                 values->data.F32[n] = value;
    161                 n++;
    162             }
    163         }
    164     }
    165 
    166     psFree(bad);
    167 
    168     if (n < 0.01*Nsubset) {
    169         if ((nFailures < 3) || (nFailures % 100 == 0)) {
    170             psLogMsg("psLib.psImageBackground", PS_LOG_DETAIL, "Unable to measure image background: too few data points (%ld) (%d failures)", n, nFailures);
    171         }
    172         nFailures ++;
    173         psFree(values);
    174         return false;
    175     }
    176 
    177     values->n = n;
     140        psTrace ("psLib.imageops", 4, "case 4: Fisher-Yates (%d x %d : %d : %d : %d)\n", (int) nx, (int) ny, (int) nSubset, (int) nGoodPixels, (int) nPixels);
     141        // use Knuth's version of Fisher-Yates to select a random subset of the pixels, but
     142        // only drawing each value once
     143           
     144        // generate a vector of all pixels which may in theory be selected
     145        psVector *pixelVector = psVectorAllocEmpty(nGoodPixels, PS_TYPE_F32);
     146        for (long iy = 0; iy < ny; iy++) {
     147            for (long ix = 0; ix < nx; ix++) {
     148                if (!isfinite(image->data.F32[iy][ix])) continue;
     149                if (mask && mask->data.PS_TYPE_IMAGE_MASK_DATA[iy][ix] & maskValue) continue;
     150                psVectorAppend(pixelVector, image->data.F32[iy][ix]);
     151            }
     152        }
     153        psAssert (nGoodPixels == pixelVector->n, "we must have mis-counted above");
     154       
     155        // generate the unique random subset
     156        for (int i = 0; i < nSubset; i++) {
     157            double frnd = psRandomUniform(rng);
     158            int pixel = pixelVector->n * frnd;
     159               
     160            psVectorAppend(values, pixelVector->data.F32[pixel]);
     161            pixelVector->data.F32[pixel] = pixelVector->data.F32[pixelVector->n - 1];
     162            pixelVector->n --;
     163        }
     164        psFree (pixelVector);
     165    }
     166    psAssert (values->n >= 0.01*nSubset, "didn't we test this above?");
    178167
    179168    if (stats->options & PS_STAT_ROBUST_QUARTILE) {
     
    181170        // XXX this hack is just for testing, drop when I am happy with the psStats version of the values
    182171
    183         int imin = stats->min * n;
    184         int imax = stats->max * n;
     172        int imin = stats->min * values->n;
     173        int imax = stats->max * values->n;
    185174        int npts = imax - imin + 1;
    186175
     
    196185        // Subtract the median when we add the numbers, so we don't get numerical problems
    197186        float median = npts % 2 ? 0.5 * (values->data.F32[npts/2 - 1] + values->data.F32[npts/2]) :
    198                        values->data.F32[npts/2];
     187            values->data.F32[npts/2];
    199188        double value = 0;
    200         for (long i = imin; (i <= imax) && (i < n); i++) {
     189        for (long i = imin; (i <= imax) && (i < values->n); i++) {
    201190            value += values->data.F32[i] - median;
    202191        }
     
    234223    return true;
    235224}
     225
     226
     227/***  old comments
     228
     229 // Subsample all pixels
     230 // This is not optimal, since there may be a large masked fraction that leaves us with few good pixels.
     231 // In that case, you should have set Nsubset.......
     232
     233 // 2011/03/22 - MWV:  On prompting from Gene, changed the sampling so that it doesn't build and then sort
     234 //     a huge array when Nsubset << Npixel. 
     235 //   O(Npixel) log(Npixel) is unnecessarily painful when Npixel=38 million (one skycell)
     236 //     but we only needed Nsubset pixels and Nsubset << Npixel.
     237
     238 //   It also perhaps partially defeats any gain of taking only subsamples if we're sorting the array.
     239 //   I should do some performance tests on this anyway.
     240
     241 // If our chance of collision is low and the gain from not sorting the array is likely to be high,
     242 // then just pick randomnly
     243 // Note that we're guaranteeing Nsubset pixels returned here (up to the limit of Npixels)
     244
     245 // 2011/03/16 - MWV:  Fixed double-sampling of sky pixels.
     246 //   The pick-a-random-pixel-at-a-time approach  doesn't work well when Npixels is close to Nsubset
     247 //   If Nsubset is 0.8*Npixels, then we will get lots of pixels double-counted
     248 //   This is correct on average, but isn't the optimal way to estimate the sky level
     249 // Instead we take the following approach:
     250 //   1) Calculate a random ordering of the pixels
     251 //   2) Go through this ordering up to Nsubset to select pixels
     252 // This is O(n log(n))
     253
     254 ***/
Note: See TracChangeset for help on using the changeset viewer.