IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Oct 24, 2013, 6:08:23 PM (13 years ago)
Author:
watersc1
Message:

Free forgotten values in psMinimizePolyFit. Correct some errors in the fitted statistics. This resolves the abort issue that was arising because the robust sigma calculation would occasionally become tainted with NAN values.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/math/psStats.c

    r36222 r36237  
    141141
    142142// Debug information
    143 #define CZW 1
     143#define CZW 0
    144144
    145145/*****************************************************************************/
     
    425425    for (long i = 0; i < myVector->n; i++) {
    426426        // Check if the data is with the specified range
    427 
    428427        if (!isfinite(data[i]))
    429428            continue;
     
    881880        cumulative = psHistogramAlloc(min, max, numBins);
    882881        cumulative->nums->data.F32[0] = histogram->nums->data.F32[0];
    883         cumulative->bounds->data.F32[0] = histogram->bounds->data.F32[0];
     882        cumulative->bounds->data.F32[0] = histogram->bounds->data.F32[1];
    884883
    885884        // Correctly fill the cumulative distribution with monotonically increasing values (skip zero valued bins).
     
    887886        // the boundaries for the current cumulative bin are from upper end of the last valid histogram bin to the
    888887        // upper end of the current histogram bin
    889         for (long i = 1; i < histogram->nums->n; i++) {
     888        for (long i = 1; i < histogram->nums->n - 1; i++) {
    890889            if (histogram->nums->data.F32[i] == 0.0) continue;
    891890            cumulative->nums->data.F32[Nc] = cumulative->nums->data.F32[Nc - 1] + histogram->nums->data.F32[i];
    892             cumulative->bounds->data.F32[Nc] = histogram->bounds->data.F32[i];
     891            cumulative->bounds->data.F32[Nc] = histogram->bounds->data.F32[i+1];
    893892            Nc ++;
    894893        }
     
    898897            cumulative->bounds->data.F32[i] = cumulative->bounds->data.F32[i-1] + 1.0;
    899898        }
    900 
    901 # if (0)
    902         // Correctly fill the cumulative distribution with monotonically increasing values (skip zero valued bins).
    903         long delta = 0;
    904         long delta_x = 0;
    905         for (long i = 1; i < histogram->nums->n; i++) {
    906           if (histogram->nums->data.F32[i] > 0.0) { // This bin is bigger than the last one.
    907             cumulative->nums->data.F32[i - delta] = cumulative->nums->data.F32[i - delta - 1] + histogram->nums->data.F32[i];
    908             cumulative->bounds->data.F32[i - delta - 1] = histogram->bounds->data.F32[i - delta_x];
    909             delta_x = 0;
    910           }
    911           else { // This bin is the same as the last one, so we shouldn't count this bound
    912             delta++;
    913             delta_x++;
    914           }
    915         }
    916         for (long i = histogram->nums->n - delta; i < histogram->nums->n; i++) { // Ensure the unused entries are filled.
    917           cumulative->nums->data.F32[i] = cumulative->nums->data.F32[histogram->nums->n - delta - 1];
    918           cumulative->bounds->data.F32[i] = cumulative->bounds->data.F32[i-1] + 1.0;
    919         }
    920 # endif
    921899       
    922900        if (psTraceGetLevel("psLib.math") >= 8) {
     
    940918        // There's no reason to do a quadratic fit near the 50% bin, as it's approximately linear there.
    941919        // Instead, do a 5-point linear fit.
    942 # if (0)
    943         { // Quick 5-point linear fit
    944           double Sx = (cumulative->nums->data.F32[binMedian - 2] + cumulative->nums->data.F32[binMedian - 1] +
    945                        cumulative->nums->data.F32[binMedian - 0] +
    946                        cumulative->nums->data.F32[binMedian + 1] + cumulative->nums->data.F32[binMedian + 2]);
    947           double Sy = (cumulative->bounds->data.F32[binMedian - 2] + cumulative->bounds->data.F32[binMedian - 1] +
    948                        cumulative->bounds->data.F32[binMedian - 0] +
    949                        cumulative->bounds->data.F32[binMedian + 1] + cumulative->bounds->data.F32[binMedian + 2]);
    950           double Sxx = (pow(cumulative->nums->data.F32[binMedian - 2],2) + pow(cumulative->nums->data.F32[binMedian - 1],2) +
    951                         pow(cumulative->nums->data.F32[binMedian - 0],2) +
    952                         pow(cumulative->nums->data.F32[binMedian + 1],2) + pow(cumulative->nums->data.F32[binMedian + 2],2));
    953           double Sxy = (cumulative->bounds->data.F32[binMedian - 2] * cumulative->nums->data.F32[binMedian - 2] +
    954                         cumulative->bounds->data.F32[binMedian - 1] * cumulative->nums->data.F32[binMedian - 1] +
    955                         cumulative->bounds->data.F32[binMedian - 0] * cumulative->nums->data.F32[binMedian - 0] +
    956                         cumulative->bounds->data.F32[binMedian + 1] * cumulative->nums->data.F32[binMedian + 1] +
    957                         cumulative->bounds->data.F32[binMedian + 2] * cumulative->nums->data.F32[binMedian + 2]);
    958           double linearMedian = ((Sy * Sxx - Sx * Sxy) + (5 * Sxy - Sx * Sy) * (totalDataPoints/2.0))/(5 * Sxx - Sx * Sx);
    959           // psLogMsg("psLib",5,"Median Comp: %f %f\n",stats->robustMedian,linearMedian);
    960           stats->robustMedian = linearMedian;
    961         }
    962 # endif
    963920        stats->robustMedian = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
    964921
     
    1003960        // (extrapolation should not be needed and will result in errors)
    1004961        float binLoF32, binHiF32, binL2F32, binH2F32, binL4F32, binH4F32;
     962#if (0)
    1005963        PS_BIN_INTERPOLATE (binLoF32, cumulative->nums, cumulative->bounds, binLo,
    1006964                            totalDataPoints * 0.158655f);
     
    1015973        PS_BIN_INTERPOLATE (binH4F32, cumulative->nums, cumulative->bounds, binH4,
    1016974                            totalDataPoints * 0.977250f);
    1017 
     975#else
     976        binLoF32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binLo, totalDataPoints * 0.158655);
     977        binHiF32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binHi, totalDataPoints * 0.841345);         
     978        binL2F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binL2, totalDataPoints * 0.308538);
     979        binH2F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binH2, totalDataPoints * 0.691462);         
     980        binL4F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binL4, totalDataPoints * 0.022750);
     981        binH4F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binH4, totalDataPoints * 0.977250);
     982#endif 
    1018983        // report +/- 1 sigma points
    1019984        psTrace(TRACE, 5,
     
    1027992                binL4F32, binH4F32);
    1028993
     994        // If some of the fits failed, attempt to fix this
     995        if (!isfinite(binLoF32) && isfinite(binHiF32)) { binLoF32 = -1.0 * binHiF32; }
     996        if (!isfinite(binHiF32) && isfinite(binLoF32)) { binHiF32 = -1.0 * binLoF32; }
     997        if (!isfinite(binL2F32) && isfinite(binH2F32)) { binL2F32 = -1.0 * binH2F32; }
     998        if (!isfinite(binH2F32) && isfinite(binL2F32)) { binH2F32 = -1.0 * binL2F32; }
     999        if (!isfinite(binL4F32) && isfinite(binH4F32)) { binL4F32 = -1.0 * binH4F32; }
     1000        if (!isfinite(binH4F32) && isfinite(binL4F32)) { binH4F32 = -1.0 * binL4F32; }
     1001       
    10291002        // ADD step 5: Determine SIGMA as the distance between binL2 and binH2 (+/- 0.5 sigma)
     1003
     1004
    10301005        float sigma1 = (binH2F32 - binL2F32);
    10311006        float sigma2 = (binHiF32 - binLoF32) / 2.0;
    10321007        float sigma4 = (binH4F32 - binL4F32) / 4.0;
    10331008
     1009        // Fix again?
     1010        if (!isfinite(sigma1) && isfinite(sigma2) && isfinite(sigma4)) { sigma1 = (sigma2 + sigma4) / 2.0; }
     1011        if (!isfinite(sigma2) && isfinite(sigma1) && isfinite(sigma4)) { sigma2 = (sigma1 + sigma4) / 2.0; }
     1012        if (!isfinite(sigma4) && isfinite(sigma2) && isfinite(sigma1)) { sigma4 = (sigma2 + sigma1) / 2.0; }
     1013       
    10341014        // take the smallest of the three: if we have a clump with wide outliers, sigma2 and
    10351015        // sigma4 will be biased high; if we have a bi-modal distribution, sigma1 and sigma2
    10361016        // will be biased high.
    1037         sigma = PS_MIN (sigma1, PS_MIN (sigma2, sigma4));
     1017        //        sigma = PS_MIN (sigma1, PS_MIN (sigma2, sigma4));
     1018        // CZW: Instead, take the median.  Taking the MIN forces a bias on unbiased data.
     1019        //      It seems like occasionally getting the wrong answer on a complex distribution
     1020        //      is more acceptable than always getting the wrong answer for simple ones.
     1021
     1022       
     1023        sigma = PS_MAX( PS_MIN(sigma1,sigma2),
     1024                        PS_MIN( PS_MAX(sigma1,sigma2),
     1025                                sigma4));
    10381026
    10391027        psTrace(TRACE, 6, "The 1x sigma is %f.\n", sigma1);
     
    10421030
    10431031        psTrace(TRACE, 6, "The current sigma is %f.\n", sigma);
    1044         stats->robustStdev = sigma;
    1045 
    1046 
    1047 #if (CZW && 0)
     1032        //        stats->robustStdev = sigma;
     1033        stats->robustStdev = sigma;
     1034
     1035#if (CZW && 0)
     1036        // Skewness check: Find least biased sample for each pair.
     1037        sigma1 = 2.0 * PS_MIN(binH2F32 - stats->robustMedian,
     1038                              stats->robustMedian - binL2F32);
     1039        sigma2 = 1.0 * PS_MIN(binHiF32 - stats->robustMedian,
     1040                              stats->robustMedian - binLoF32);
     1041        sigma4 = 0.5 * PS_MIN(binH4F32 - stats->robustMedian,
     1042                              stats->robustMedian - binL4F32);
     1043        // Kurtosis check: Take median sample as the solution.
     1044        stats->robustStdev = PS_MAX( PS_MIN(sigma1,sigma2),
     1045                                     PS_MIN( PS_MAX(sigma1,sigma2),
     1046                                             sigma4));
     1047#endif
     1048
     1049       
     1050#if (0)
     1051        printf("CZW: bad sigma?: %f %f  %f %f  %f %f  %f %f %f  %f\n",
     1052               binH2F32,binL2F32,binHiF32,binLoF32,binH4F32,binL4F32,
     1053               sigma1,sigma2,sigma4,sigma);
     1054       
    10481055        printf("CZW (%d): median %f sigma %f delta: %f \n\t %f %f %f %f %f %f %f \n\t %f %f %f %f %f %f %f\n",
    10491056               iterate,
     
    12001207 * "vectorFittedStats_v4" all versions of fitted stats now resolve to this function (only v4
    12011208 * has really been used) vectorFittedStats requires guess for fittedMean and fittedStdev
    1202  * robustN50 should also be set gaussian fit is performed using 2D polynomial to ln(y) this
     1209 * robustN50 should also be set gaussian fit is performed using 1D polynomial to ln(y) this
    12031210 * version follows the upper portion of the distribution until it passes 0.5*peak
    12041211 ********************/
     
    12351242        return true;
    12361243    }
    1237 
     1244    if (myVector->n < 1) { printf("There are no elements in this vector.\n"); abort(); }
    12381245    float guessStdev = stats->robustStdev;  // pass the guess sigma
    12391246    float guessMean = stats->robustMedian;  // pass the guess mean
     
    12551262            // set roughly so that the lowest bins have about 2 cnts
    12561263            // Nsmallest ~ N50 / (4*dN))
    1257             psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8));
    1258             binSize = guessStdev / dN;
     1264            psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8)); 
     1265           binSize = guessStdev / dN;
    12591266        }
    12601267
     
    12881295        psTrace(TRACE, 6, "The numBins is %ld\n", numBins);
    12891296
    1290        
     1297
    12911298        psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers)
    12921299        if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) {
     
    15511558
    15521559       
    1553 #if (CZW && 0)
     1560#if (CZW && 1)
    15541561        printf("CZW IN FITTED: iter   %d %f \n"
    15551562               "               low    %f %f \n"
     
    25752582        Sxx += PS_SQR(yVec->data.F32[u]);
    25762583
    2577         deltaY = 0.5 * (xVec->data.F32[u] + xVec->data.F32[u+1]);
     2584        deltaY = xVec->data.F32[u];
     2585        //deltaY = 0.5 * (xVec->data.F32[u] + xVec->data.F32[u+1]);
    25782586        Sy += deltaY;
    25792587        Sxy += yVec->data.F32[u] * deltaY;
Note: See TracChangeset for help on using the changeset viewer.