IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 36105


Ignore:
Timestamp:
Sep 10, 2013, 5:41:53 PM (13 years ago)
Author:
watersc1
Message:

Fixes to robust median: -CDF should skip empty bins. -the median should be fit with a linear fit, as the CDF isn't quadratic there. -+/- 2sigma values in the Gaussian CDF were incorrect. -Update quadratic fit to use five points, as that is less sensitive to noise. -Various debug information ifdef-ed out.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/math/psStats.c

    r34703 r36105  
    140140}
    141141
     142// Debug information
     143#define CZW 0
    142144
    143145/*****************************************************************************/
     
    793795        } else {
    794796            // Determine the bin size of the robust histogram, using the pre-defined number of bins
    795             binSize = (max - min) / INITIAL_NUM_BINS;
     797            binSize = (max - min) / INITIAL_NUM_BINS;
    796798        }
    797799        psTrace(TRACE, 6, "Initial robust bin size is %.2f\n", binSize);
     
    876878        cumulative = psHistogramAlloc(min, max, numBins);
    877879        cumulative->nums->data.F32[0] = histogram->nums->data.F32[0];
    878         for (long i = 1; i < histogram->nums->n; i++) {
    879             cumulative->nums->data.F32[i] = cumulative->nums->data.F32[i-1] + histogram->nums->data.F32[i];
    880             cumulative->bounds->data.F32[i-1] = histogram->bounds->data.F32[i];
    881         }
     880
     881        // Correctly fill the cumulative distribution with monotonically increasing values (skip zero valued bins).
     882        long delta = 0;
     883        long delta_x = 0;
     884        for (long i = 1; i < histogram->nums->n; i++) {
     885          if (histogram->nums->data.F32[i] > 0.0) { // This bin is bigger than the last one.
     886            cumulative->nums->data.F32[i - delta] = cumulative->nums->data.F32[i - delta - 1] + histogram->nums->data.F32[i];
     887            cumulative->bounds->data.F32[i - delta - 1] = histogram->bounds->data.F32[i - delta_x];
     888            delta_x = 0;
     889          }
     890          else { // This bin is the same as the last one, so we shouldn't count this bound
     891            delta++;
     892            delta_x++;
     893          }
     894        }
     895        for (long i = histogram->nums->n - delta; i < histogram->nums->n; i++) { // Ensure the unused entries are filled.
     896          cumulative->nums->data.F32[i] = cumulative->nums->data.F32[histogram->nums->n - delta - 1];
     897          cumulative->bounds->data.F32[i] = cumulative->bounds->data.F32[i-1] + 1.0;
     898        }
     899       
    882900        if (psTraceGetLevel("psLib.math") >= 8) {
    883901            PS_VECTOR_PRINT_F32(cumulative->bounds);
     
    895913
    896914        // ADD step 3: Interpolate to the exact 50% position in bin units
    897         stats->robustMedian = fitQuadraticSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
    898         // float robustBin = fitQuadraticSearchForYThenReturnXusingValues(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
     915        // stats->robustMedian = fitQuadraticSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
     916        // float robustBin = fitQuadraticSearchForYThenReturnXusingValues(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
    899917        // fprintf (stderr, "robustBin : %f vs %f\n", robustBin, stats->robustMedian);
     918        // There's no reason to do a quadratic fit near the 50% bin, as it's approximately linear there.
     919        // Instead, do a 5-point linear fit.
     920        { // Quick 5-point linear fit
     921          double Sx = (cumulative->nums->data.F32[binMedian - 2] + cumulative->nums->data.F32[binMedian - 1] +
     922                       cumulative->nums->data.F32[binMedian - 0] +
     923                       cumulative->nums->data.F32[binMedian + 1] + cumulative->nums->data.F32[binMedian + 2]);
     924          double Sy = (cumulative->bounds->data.F32[binMedian - 2] + cumulative->bounds->data.F32[binMedian - 1] +
     925                       cumulative->bounds->data.F32[binMedian - 0] +
     926                       cumulative->bounds->data.F32[binMedian + 1] + cumulative->bounds->data.F32[binMedian + 2]);
     927          double Sxx = (pow(cumulative->nums->data.F32[binMedian - 2],2) + pow(cumulative->nums->data.F32[binMedian - 1],2) +
     928                        pow(cumulative->nums->data.F32[binMedian - 0],2) +
     929                        pow(cumulative->nums->data.F32[binMedian + 1],2) + pow(cumulative->nums->data.F32[binMedian + 2],2));
     930          double Sxy = (cumulative->bounds->data.F32[binMedian - 2] * cumulative->nums->data.F32[binMedian - 2] +
     931                        cumulative->bounds->data.F32[binMedian - 1] * cumulative->nums->data.F32[binMedian - 1] +
     932                        cumulative->bounds->data.F32[binMedian - 0] * cumulative->nums->data.F32[binMedian - 0] +
     933                        cumulative->bounds->data.F32[binMedian + 1] * cumulative->nums->data.F32[binMedian + 1] +
     934                        cumulative->bounds->data.F32[binMedian + 2] * cumulative->nums->data.F32[binMedian + 2]);
     935          double linearMedian = ((Sy * Sxx - Sx * Sxy) + (5 * Sxy - Sx * Sy) * (totalDataPoints/2.0))/(5 * Sxx - Sx * Sx);
     936          // psLogMsg("psLib",5,"Median Comp: %f %f\n",stats->robustMedian,linearMedian);
     937          stats->robustMedian = linearMedian;
     938        }
    900939
    901940        // convert bin to bin value: this is the robust histogram median.
     
    912951        PS_BIN_FOR_VALUE(binL2, cumulative->nums, totalDataPoints * 0.308538f, 0);
    913952        PS_BIN_FOR_VALUE(binH2, cumulative->nums, totalDataPoints * 0.691462f, 0);
    914         PS_BIN_FOR_VALUE(binL4, cumulative->nums, totalDataPoints * 0.022481f, 0);
    915         PS_BIN_FOR_VALUE(binH4, cumulative->nums, totalDataPoints * 0.977519f, 0);
    916 
     953        PS_BIN_FOR_VALUE(binL4, cumulative->nums, totalDataPoints * 0.022750f, 0);
     954        PS_BIN_FOR_VALUE(binH4, cumulative->nums, totalDataPoints * 0.977250f, 0);
     955       
     956       
    917957        psTrace(TRACE, 6, "The 15.8655%% and 84.1345%% data point bins are (%ld, %ld).\n",
    918958                binLo, binHi);
     
    926966            goto escape;
    927967        }
    928 
     968   
    929969        // ADD step 4b: Interpolate Sigma (linearly) to find these two positions exactly: these are the 1sigma
    930970        // positions.
     
    947987                            totalDataPoints * 0.691462f);
    948988        PS_BIN_INTERPOLATE (binL4F32, cumulative->nums, cumulative->bounds, binL4,
    949                             totalDataPoints * 0.022481f);
     989                            totalDataPoints * 0.022750f);
    950990        PS_BIN_INTERPOLATE (binH4F32, cumulative->nums, cumulative->bounds, binH4,
    951                             totalDataPoints * 0.977519f);
     991                            totalDataPoints * 0.977250f);
    952992
    953993        // report +/- 1 sigma points
     
    959999                binL2F32, binH2F32);
    9601000        psTrace(TRACE, 5,
    961                 "The exact 02.22481 and 97.7519 percent data point positions are: (%f, %f)\n",
     1001                "The exact 02.2275 and 97.7250 percent data point positions are: (%f, %f)\n",
    9621002                binL4F32, binH4F32);
    9631003
     
    9781018        psTrace(TRACE, 6, "The current sigma is %f.\n", sigma);
    9791019        stats->robustStdev = sigma;
     1020
     1021
     1022#if (CZW)
     1023        printf("CZW (%d): median %f sigma %f delta: %f \t %f %f %f %f %f %f %f \t %f %f %f %f %f %f %f\n",
     1024               iterate,
     1025           stats->robustMedian,stats->robustStdev,
     1026           fabs(cumulative->bounds->data.F32[binMedian] - cumulative->bounds->data.F32[binMedian + 1]),
     1027
     1028           cumulative->bounds->data.F32[binMedian-3],cumulative->bounds->data.F32[binMedian-2],
     1029           cumulative->bounds->data.F32[binMedian-1],
     1030           cumulative->bounds->data.F32[binMedian],
     1031           cumulative->bounds->data.F32[binMedian+1],
     1032           cumulative->bounds->data.F32[binMedian+2],cumulative->bounds->data.F32[binMedian+3],
     1033
     1034           cumulative->nums->data.F32[binMedian-3],cumulative->nums->data.F32[binMedian-2],
     1035           cumulative->nums->data.F32[binMedian-1],
     1036           cumulative->nums->data.F32[binMedian],
     1037           cumulative->nums->data.F32[binMedian+1],
     1038           cumulative->nums->data.F32[binMedian+2],cumulative->nums->data.F32[binMedian+3]);
     1039        PS_VECTOR_PRINT_F32(histogram->bounds);
     1040        PS_VECTOR_PRINT_F32(histogram->nums);
     1041        PS_VECTOR_PRINT_F32(cumulative->bounds);
     1042        PS_VECTOR_PRINT_F32(cumulative->nums);
     1043#endif
    9801044
    9811045        // ADD step 6: If the measured SIGMA is less than 2 times the bin size, exclude points which are more
     
    10301094        }
    10311095    }
    1032 
     1096   
    10331097    // XXX test lines while studying algorithm errors
    10341098    // fprintf (stderr, "robust stats test %7.1f +/- %7.1f : %4ld %4ld %4ld %4ld %4ld  : %f %f %f\n",
     
    11781242            return true;
    11791243        }
    1180 
     1244        //      printf("BINS: %ld %f %f %f %f\n",stats->robustN50,guessStdev,binSize,min,max);
    11811245        // Calculate the number of bins.
    11821246        // XXX can we calculate the binMin, binMax **before** building this histogram?
     
    11881252        psTrace(TRACE, 6, "The numBins is %ld\n", numBins);
    11891253
     1254        //      printf("BINS2: %ld %f %f %f %f %ld\n",stats->robustN50,guessStdev,binSize,min,max,numBins);
     1255       
    11901256        psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers)
    11911257        if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) {
     
    14371503            done = true;
    14381504        }
     1505#if (CZW)
     1506        printf("CZW IN FITTED? low %f %f full %f %f robust %f %f final %f %f\n",
     1507               lowfitMean,lowfitStdev,fullfitMean,fullfitStdev,stats->robustMedian,stats->robustStdev,
     1508               guessMean,guessStdev);
     1509#endif
    14391510
    14401511        // Clean up after fitting
     
    22762347    PS_ASSERT_INT_WITHIN_RANGE(binNum, 0, (int)(yVec->n - 1), NAN);
    22772348
    2278     psVector *x = psVectorAlloc(3, PS_TYPE_F64);
    2279     psVector *y = psVectorAlloc(3, PS_TYPE_F64);
     2349    //    psVector *x = psVectorAlloc(3, PS_TYPE_F64);
     2350    //    psVector *y = psVectorAlloc(3, PS_TYPE_F64);
     2351    psVector *x = psVectorAlloc(5, PS_TYPE_F64);
     2352    psVector *y = psVectorAlloc(5, PS_TYPE_F64);
    22802353    psF32 tmpFloat = 0.0f;
    22812354
    2282     if ((binNum >= 1) && (binNum <= (yVec->n - 2)) && (binNum <= (xVec->n - 2))) {
     2355    //    if ((binNum >= 1) && (binNum <= (yVec->n - 2)) && (binNum <= (xVec->n - 2))) {
     2356    if ((binNum >= 2) && (binNum <= (yVec->n - 3)) && (binNum <= (xVec->n - 3))) {
    22832357        // The general case.  We have all three points.
    2284         x->data.F64[0] = binNum - 1;
    2285         x->data.F64[1] = binNum;
    2286         x->data.F64[2] = binNum + 1;
    2287         y->data.F64[0] = yVec->data.F32[binNum - 1];
    2288         y->data.F64[1] = yVec->data.F32[binNum];
    2289         y->data.F64[2] = yVec->data.F32[binNum + 1];
     2358      //        x->data.F64[0] = binNum - 1;
     2359      //        x->data.F64[1] = binNum;
     2360      //        x->data.F64[2] = binNum + 1;
     2361      x->data.F64[0] = xVec->data.F32[binNum - 2];
     2362      x->data.F64[1] = xVec->data.F32[binNum - 1];
     2363      x->data.F64[2] = xVec->data.F32[binNum + 0];
     2364      x->data.F64[3] = xVec->data.F32[binNum + 1];
     2365      x->data.F64[4] = xVec->data.F32[binNum + 2];
     2366        y->data.F64[0] = yVec->data.F32[binNum - 2];
     2367        y->data.F64[1] = yVec->data.F32[binNum - 1];
     2368        y->data.F64[2] = yVec->data.F32[binNum + 0];
     2369        y->data.F64[3] = yVec->data.F32[binNum + 1];
     2370        y->data.F64[4] = yVec->data.F32[binNum + 2];
    22902371        psTrace(TRACE, 6, "x vec (orig) is (%f %f %f %f)\n", xVec->data.F32[binNum - 1], xVec->data.F32[binNum], xVec->data.F32[binNum+1], xVec->data.F32[binNum+2]);
    22912372        psTrace(TRACE, 6, "x data is (%f %f %f)\n", x->data.F64[0], x->data.F64[1], x->data.F64[2]);
    22922373        psTrace(TRACE, 6, "y data is (%f %f %f)\n", y->data.F64[0], y->data.F64[1], y->data.F64[2]);
    22932374
     2375#if (CZW)
     2376        printf("  polyin: %f %f %f == %f %f %f\n",
     2377               x->data.F64[0], x->data.F64[1], x->data.F64[2],
     2378               y->data.F64[0], y->data.F64[1], y->data.F64[2]);
     2379        printf("  rawpolyin: %f %f %f == %f %f %f\n",
     2380               xVec->data.F32[binNum - 1], xVec->data.F32[binNum], xVec->data.F32[binNum + 1],
     2381               y->data.F64[0], y->data.F64[1], y->data.F64[2]);
     2382#endif
     2383
    22942384        // Ensure that the y value lies within range of the y values.
    2295         if (! (((y->data.F64[0] <= yVal) && (yVal <= y->data.F64[2])) ||
    2296                ((y->data.F64[2] <= yVal) && (yVal <= y->data.F64[0]))) ) {
     2385        if (! (((y->data.F64[0] <= yVal) && (yVal <= y->data.F64[4])) ||
     2386               ((y->data.F64[4] <= yVal) && (yVal <= y->data.F64[0]))) ) {
    22972387            psError(PS_ERR_BAD_PARAMETER_VALUE, true,
    22982388                    _("Specified yVal, %g, is not within y-range, %g to %g."),
     
    23292419                (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[1]),
    23302420                (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[2]));
     2421#if (CZW)
     2422        printf("  poly: %f %f %f fit: %f %f %f\n",
     2423               myPoly->coeff[0],myPoly->coeff[1],myPoly->coeff[2],
     2424               (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[0]),
     2425               (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[1]),
     2426               (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[2]));
     2427#endif
    23312428
    23322429        psTrace(TRACE, 6, "We fit the polynomial, now find x such that f(x) equals %f\n", yVal);
    2333         float binValue = QuadraticInverse(myPoly->coeff[2], myPoly->coeff[1], myPoly->coeff[0], yVal, x->data.F64[0], x->data.F64[2]);
     2430        float binValue = QuadraticInverse(myPoly->coeff[2], myPoly->coeff[1], myPoly->coeff[0], yVal, x->data.F64[0], x->data.F64[4]);
    23342431        psFree(myPoly);
    23352432
     
    23412438            return(NAN);
    23422439        }
    2343 
     2440       
    23442441        // I believe that mathematically the fitted bin position must be between binNum - 1 and binNum + 1
    2345         assert (binValue >= binNum - 1);
    2346         assert (binValue <= binNum + 1);
    2347 
    2348         int fitBin = binValue;
    2349         float dX = xVec->data.F32[fitBin+1] - xVec->data.F32[fitBin];
    2350         float dY = binValue - fitBin;
    2351         tmpFloat = xVec->data.F32[fitBin] + dY * dX;
     2442        //      assert (binValue >= binNum - 1);
     2443        //      assert (binValue <= binNum + 1);
     2444
     2445        //      int fitBin = binValue;
     2446        //        float dX = xVec->data.F32[fitBin+1] - xVec->data.F32[fitBin];
     2447        //        float dY = binValue - fitBin;
     2448        //        tmpFloat = xVec->data.F32[fitBin] + dY * dX;
     2449        tmpFloat = binValue;
     2450               
     2451#if (CZW)
     2452        printf("   internal median: %f %f\n",tmpFloat,binValue);
     2453#endif
     2454       
    23522455    } else {
    23532456        // These are special cases where the bin is at the beginning or end of the vector.
Note: See TracChangeset for help on using the changeset viewer.