IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 15126


Ignore:
Timestamp:
Sep 29, 2007, 1:58:55 PM (19 years ago)
Author:
eugene
Message:

for Robust Stats:

  • measure +/- 0.5, 1.0, 2.0 sigma fractions to estimate sigma
  • take the minimum of the three :
    • if the distribution has very wide outliers, 2.0 sigma will be biased high
    • if the distribution is bi-modal, 0.5 sigma will be biased high

(I don't currently do anything smart if the distribution is bi-modal)

for Fittest Stats (v4):

  • if the fit to the parabola is insane, don't let the mean go outside the range of binMin - binMax.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/math/psStats.c

    r15048 r15126  
    1313 * use ->min and ->max (PS_STAT_USE_RANGE)
    1414 *
    15  *  @version $Revision: 1.217 $ $Name: not supported by cvs2svn $
    16  *  @date $Date: 2007-09-28 00:36:30 $
     15 *  @version $Revision: 1.218 $ $Name: not supported by cvs2svn $
     16 *  @date $Date: 2007-09-29 23:58:55 $
    1717 *
    1818 *  Copyright 2006 IfA, University of Hawaii
     
    744744    long binLo, binHi;
    745745    long binL2, binH2;
     746    long binL4, binH4;
    746747    long binMedian;
    747748
     
    842843        PS_BIN_FOR_VALUE(binL2, cumulative->nums, totalDataPoints * 0.308538f, 0);
    843844        PS_BIN_FOR_VALUE(binH2, cumulative->nums, totalDataPoints * 0.691462f, 0);
     845        PS_BIN_FOR_VALUE(binL4, cumulative->nums, totalDataPoints * 0.022481f, 0);
     846        PS_BIN_FOR_VALUE(binH4, cumulative->nums, totalDataPoints * 0.977519f, 0);
    844847
    845848        psTrace(TRACE, 6, "The 15.8655%% and 84.1345%% data point bins are (%ld, %ld).\n",
     
    864867        // containing the value of interest.  constrain the answer to be within the current bin.
    865868        // (extrapolation should not be needed and will result in errors)
    866         float binLoF32, binHiF32, binL2F32, binH2F32;
    867         PS_BIN_INTERPOLATE (binLoF32, cumulative->nums, cumulative->bounds, binL2, totalDataPoints * 0.158655f);
    868         PS_BIN_INTERPOLATE (binHiF32, cumulative->nums, cumulative->bounds, binH2, totalDataPoints * 0.841345f);
     869        float binLoF32, binHiF32, binL2F32, binH2F32, binL4F32, binH4F32;
     870        PS_BIN_INTERPOLATE (binLoF32, cumulative->nums, cumulative->bounds, binLo, totalDataPoints * 0.158655f);
     871        PS_BIN_INTERPOLATE (binHiF32, cumulative->nums, cumulative->bounds, binHi, totalDataPoints * 0.841345f);
    869872        PS_BIN_INTERPOLATE (binL2F32, cumulative->nums, cumulative->bounds, binL2, totalDataPoints * 0.308538f);
    870873        PS_BIN_INTERPOLATE (binH2F32, cumulative->nums, cumulative->bounds, binH2, totalDataPoints * 0.691462f);
     874        PS_BIN_INTERPOLATE (binL4F32, cumulative->nums, cumulative->bounds, binL4, totalDataPoints * 0.022481f);
     875        PS_BIN_INTERPOLATE (binH4F32, cumulative->nums, cumulative->bounds, binH4, totalDataPoints * 0.977519f);
    871876
    872877        // report +/- 1 sigma points
     
    874879                "The exact 15.8655 and 84.1345 percent data point positions are: (%f, %f)\n",
    875880                binLoF32, binHiF32);
    876 
    877         // ADD step 5: Determine SIGMA as (1/2 of) the distance between these positions.
    878         // sigma = (binHiF32 - binLoF32) / 2.0;
    879         sigma = (binH2F32 - binL2F32);
     881        psTrace(TRACE, 5,
     882                "The exact 30.8538 and 69.1462 percent data point positions are: (%f, %f)\n",
     883                binL2F32, binH2F32);
     884        psTrace(TRACE, 5,
     885                "The exact 02.22481 and 97.7519 percent data point positions are: (%f, %f)\n",
     886                binL4F32, binH4F32);
     887
     888        // ADD step 5: Determine SIGMA as the distance between binL2 and binH2 (+/- 0.5 sigma)
     889        float sigma1 = (binH2F32 - binL2F32);
     890        float sigma2 = (binHiF32 - binLoF32) / 2.0;
     891        float sigma4 = (binH4F32 - binL4F32) / 4.0;
     892
     893        // take the smallest of the three: if we have a clump with wide outliers, sigma2 and
     894        // sigma4 will be biased high; if we have a bi-modal distribution, sigma1 and sigma2
     895        // will be biased high.
     896        sigma = PS_MIN (sigma1, PS_MIN (sigma2, sigma4));
     897
     898        psTrace(TRACE, 6, "The 2x sigma is %f.\n", sigma1);
     899        psTrace(TRACE, 6, "The 2x sigma is %f.\n", sigma2);
     900        psTrace(TRACE, 6, "The 4x sigma is %f.\n", sigma4);
    880901
    881902        psTrace(TRACE, 6, "The current sigma is %f.\n", sigma);
     
    16981719        // XXX can we calculate the binMin, binMax **before** building this histogram?
    16991720        // if the range is too absurd, adjust numBins & binSize
    1700         long numBins = PS_MAX (5, PS_MIN (10000, (max - min) / binSize));
     1721        long numBins = PS_MAX (50, PS_MIN (10000, (max - min) / binSize));
    17011722        binSize = (max - min) / (float) numBins;
    17021723        psTrace(TRACE, 6, "The new min/max values are (%f, %f).\n", min, max);
     
    18461867
    18471868        // if we converge on a solution outside the range binMin - binMax, use a more conservative range
    1848         if (done && (guessMean > PS_BIN_MIDPOINT(histogram, binMax - 1))) {
     1869        float minValue = PS_BIN_MIDPOINT(histogram, binMin);
     1870        float maxValue = PS_BIN_MIDPOINT(histogram, binMax - 1);
     1871
     1872        if (done && ((guessMean < minValue) || (guessMean > maxValue))) {
    18491873            psTrace(TRACE, 6, "Inconsistent result, re-trying the fit\n");
    18501874
     
    19041928            psTrace(TRACE, 6, "The symmetric stdev is %f.\n", guessStdev);
    19051929#endif
     1930
     1931            // if we converge on a solution outside the range binMin - binMax, use a more conservative range
     1932            float minValueSym = PS_BIN_MIDPOINT(histogram, binS);
     1933            float maxValueSym = PS_BIN_MIDPOINT(histogram, binE - 1);
     1934
     1935            // saturate on min or max value
     1936            if (guessMean < minValueSym) {
     1937                guessMean = minValueSym;
     1938                psTrace(TRACE, 6, "The symmetric mean is out of bounds, saturating to %f.\n", guessMean);
     1939            }
     1940               
     1941            // saturate on min or max value
     1942            if (guessMean > maxValueSym) {
     1943                guessMean = maxValueSym;
     1944                psTrace(TRACE, 6, "The symmetric mean is out of bounds, saturating to %f.\n", guessMean);
     1945            }
    19061946
    19071947            psFree (poly);
Note: See TracChangeset for help on using the changeset viewer.