Context Navigation

← Previous Changeset
Next Changeset →

Changeset 36105

Timestamp:

Sep 10, 2013, 5:41:53 PM (13 years ago)

Author:

watersc1

Message:

Fixes to robust median: -CDF should skip empty bins. -the median should be fit with a linear fit, as the CDF isn't quadratic there. -+/- 2sigma values in the Gaussian CDF were incorrect. -Update quadratic fit to use five points, as that is less sensitive to noise. -Various debug information ifdef-ed out.

File:

: 1 edited

trunk/psLib/src/math/psStats.c (modified) (16 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/psLib/src/math/psStats.c

-              r34703
+              r36105
+}
+// Debug information
+#define CZW 0
 /*****************************************************************************/
 …
         } else {
             // Determine the bin size of the robust histogram, using the pre-defined number of bins
             binSize = (max - min) / INITIAL_NUM_BINS;
+            binSize = (max - min) / INITIAL_NUM_BINS;
+        }
         psTrace(TRACE, 6, "Initial robust bin size is %.2f\n", binSize);
 …
         cumulative = psHistogramAlloc(min, max, numBins);
         cumulative->nums->data.F32[0] = histogram->nums->data.F32[0];
+        for (long i = 1; i < histogram->nums->n; i++) {
+            cumulative->nums->data.F32[i] = cumulative->nums->data.F32[i-1] + histogram->nums->data.F32[i];
+            cumulative->bounds->data.F32[i-1] = histogram->bounds->data.F32[i];
+        }
+        // Correctly fill the cumulative distribution with monotonically increasing values (skip zero valued bins).
+        long delta = 0;
+        long delta_x = 0;
+        for (long i = 1; i < histogram->nums->n; i++) {
+          if (histogram->nums->data.F32[i] > 0.0) { // This bin is bigger than the last one.
+            cumulative->nums->data.F32[i - delta] = cumulative->nums->data.F32[i - delta - 1] + histogram->nums->data.F32[i];
+            cumulative->bounds->data.F32[i - delta - 1] = histogram->bounds->data.F32[i - delta_x];
+            delta_x = 0;
+          }
+          else { // This bin is the same as the last one, so we shouldn't count this bound
+            delta++;
+            delta_x++;
+          }
+        }
+        for (long i = histogram->nums->n - delta; i < histogram->nums->n; i++) { // Ensure the unused entries are filled.
+          cumulative->nums->data.F32[i] = cumulative->nums->data.F32[histogram->nums->n - delta - 1];
+          cumulative->bounds->data.F32[i] = cumulative->bounds->data.F32[i-1] + 1.0;
+        }
         if (psTraceGetLevel("psLib.math") >= 8) {
             PS_VECTOR_PRINT_F32(cumulative->bounds);
 …
         // ADD step 3: Interpolate to the exact 50% position in bin units
         stats->robustMedian = fitQuadraticSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
         // float robustBin = fitQuadraticSearchForYThenReturnXusingValues(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
+        // stats->robustMedian = fitQuadraticSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
+        // float robustBin = fitQuadraticSearchForYThenReturnXusingValues(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0);
         // fprintf (stderr, "robustBin : %f vs %f\n", robustBin, stats->robustMedian);
+        // There's no reason to do a quadratic fit near the 50% bin, as it's approximately linear there.
+        // Instead, do a 5-point linear fit.
+        { // Quick 5-point linear fit
+          double Sx = (cumulative->nums->data.F32[binMedian - 2] + cumulative->nums->data.F32[binMedian - 1] +
+                       cumulative->nums->data.F32[binMedian - 0] +
+                       cumulative->nums->data.F32[binMedian + 1] + cumulative->nums->data.F32[binMedian + 2]);
+          double Sy = (cumulative->bounds->data.F32[binMedian - 2] + cumulative->bounds->data.F32[binMedian - 1] +
+                       cumulative->bounds->data.F32[binMedian - 0] +
+                       cumulative->bounds->data.F32[binMedian + 1] + cumulative->bounds->data.F32[binMedian + 2]);
+          double Sxx = (pow(cumulative->nums->data.F32[binMedian - 2],2) + pow(cumulative->nums->data.F32[binMedian - 1],2) +
+                        pow(cumulative->nums->data.F32[binMedian - 0],2) +
+                        pow(cumulative->nums->data.F32[binMedian + 1],2) + pow(cumulative->nums->data.F32[binMedian + 2],2));
+          double Sxy = (cumulative->bounds->data.F32[binMedian - 2] * cumulative->nums->data.F32[binMedian - 2] +
+                        cumulative->bounds->data.F32[binMedian - 1] * cumulative->nums->data.F32[binMedian - 1] +
+                        cumulative->bounds->data.F32[binMedian - 0] * cumulative->nums->data.F32[binMedian - 0] +
+                        cumulative->bounds->data.F32[binMedian + 1] * cumulative->nums->data.F32[binMedian + 1] +
+                        cumulative->bounds->data.F32[binMedian + 2] * cumulative->nums->data.F32[binMedian + 2]);
+          double linearMedian = ((Sy * Sxx - Sx * Sxy) + (5 * Sxy - Sx * Sy) * (totalDataPoints/2.0))/(5 * Sxx - Sx * Sx);
+          // psLogMsg("psLib",5,"Median Comp: %f %f\n",stats->robustMedian,linearMedian);
+          stats->robustMedian = linearMedian;
+        }
         // convert bin to bin value: this is the robust histogram median.
 …
         PS_BIN_FOR_VALUE(binL2, cumulative->nums, totalDataPoints * 0.308538f, 0);
         PS_BIN_FOR_VALUE(binH2, cumulative->nums, totalDataPoints * 0.691462f, 0);
+        PS_BIN_FOR_VALUE(binL4, cumulative->nums, totalDataPoints * 0.022481f, 0);
+        PS_BIN_FOR_VALUE(binH4, cumulative->nums, totalDataPoints * 0.977519f, 0);
+        PS_BIN_FOR_VALUE(binL4, cumulative->nums, totalDataPoints * 0.022750f, 0);
+        PS_BIN_FOR_VALUE(binH4, cumulative->nums, totalDataPoints * 0.977250f, 0);
         psTrace(TRACE, 6, "The 15.8655%% and 84.1345%% data point bins are (%ld, %ld).\n",
                 binLo, binHi);
 …
             goto escape;
+        }
         // ADD step 4b: Interpolate Sigma (linearly) to find these two positions exactly: these are the 1sigma
         // positions.
 …
                             totalDataPoints * 0.691462f);
         PS_BIN_INTERPOLATE (binL4F32, cumulative->nums, cumulative->bounds, binL4,
                             totalDataPoints * 0.022481f);
+                            totalDataPoints * 0.022750f);
         PS_BIN_INTERPOLATE (binH4F32, cumulative->nums, cumulative->bounds, binH4,
                             totalDataPoints * 0.977519f);
+                            totalDataPoints * 0.977250f);
         // report +/- 1 sigma points
 …
                 binL2F32, binH2F32);
         psTrace(TRACE, 5,
                 "The exact 02.22481 and 97.7519 percent data point positions are: (%f, %f)\n",
+                "The exact 02.2275 and 97.7250 percent data point positions are: (%f, %f)\n",
                 binL4F32, binH4F32);
 …
         psTrace(TRACE, 6, "The current sigma is %f.\n", sigma);
         stats->robustStdev = sigma;
+#if (CZW)
+        printf("CZW (%d): median %f sigma %f delta: %f \t %f %f %f %f %f %f %f \t %f %f %f %f %f %f %f\n",
+               iterate,
+           stats->robustMedian,stats->robustStdev,
+           fabs(cumulative->bounds->data.F32[binMedian] - cumulative->bounds->data.F32[binMedian + 1]),
+           cumulative->bounds->data.F32[binMedian-3],cumulative->bounds->data.F32[binMedian-2],
+           cumulative->bounds->data.F32[binMedian-1],
+           cumulative->bounds->data.F32[binMedian],
+           cumulative->bounds->data.F32[binMedian+1],
+           cumulative->bounds->data.F32[binMedian+2],cumulative->bounds->data.F32[binMedian+3],
+           cumulative->nums->data.F32[binMedian-3],cumulative->nums->data.F32[binMedian-2],
+           cumulative->nums->data.F32[binMedian-1],
+           cumulative->nums->data.F32[binMedian],
+           cumulative->nums->data.F32[binMedian+1],
+           cumulative->nums->data.F32[binMedian+2],cumulative->nums->data.F32[binMedian+3]);
+        PS_VECTOR_PRINT_F32(histogram->bounds);
+        PS_VECTOR_PRINT_F32(histogram->nums);
+        PS_VECTOR_PRINT_F32(cumulative->bounds);
+        PS_VECTOR_PRINT_F32(cumulative->nums);
+#endif
         // ADD step 6: If the measured SIGMA is less than 2 times the bin size, exclude points which are more
 …
+        }
+    }
     // XXX test lines while studying algorithm errors
     // fprintf (stderr, "robust stats test %7.1f +/- %7.1f : %4ld %4ld %4ld %4ld %4ld  : %f %f %f\n",
 …
             return true;
+        }
+        //      printf("BINS: %ld %f %f %f %f\n",stats->robustN50,guessStdev,binSize,min,max);
         // Calculate the number of bins.
         // XXX can we calculate the binMin, binMax **before** building this histogram?
 …
         psTrace(TRACE, 6, "The numBins is %ld\n", numBins);
+        //      printf("BINS2: %ld %f %f %f %f %ld\n",stats->robustN50,guessStdev,binSize,min,max,numBins);
         psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers)
         if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) {
 …
             done = true;
+        }
+#if (CZW)
+        printf("CZW IN FITTED? low %f %f full %f %f robust %f %f final %f %f\n",
+               lowfitMean,lowfitStdev,fullfitMean,fullfitStdev,stats->robustMedian,stats->robustStdev,
+               guessMean,guessStdev);
+#endif
         // Clean up after fitting
 …
     PS_ASSERT_INT_WITHIN_RANGE(binNum, 0, (int)(yVec->n - 1), NAN);
+    psVector *x = psVectorAlloc(3, PS_TYPE_F64);
+    psVector *y = psVectorAlloc(3, PS_TYPE_F64);
+    //    psVector *x = psVectorAlloc(3, PS_TYPE_F64);
+    //    psVector *y = psVectorAlloc(3, PS_TYPE_F64);
+    psVector *x = psVectorAlloc(5, PS_TYPE_F64);
+    psVector *y = psVectorAlloc(5, PS_TYPE_F64);
     psF32 tmpFloat = 0.0f;
+    if ((binNum >= 1) && (binNum <= (yVec->n - 2)) && (binNum <= (xVec->n - 2))) {
+    //    if ((binNum >= 1) && (binNum <= (yVec->n - 2)) && (binNum <= (xVec->n - 2))) {
+    if ((binNum >= 2) && (binNum <= (yVec->n - 3)) && (binNum <= (xVec->n - 3))) {
         // The general case.  We have all three points.
+        x->data.F64[0] = binNum - 1;
+        x->data.F64[1] = binNum;
+        x->data.F64[2] = binNum + 1;
+        y->data.F64[0] = yVec->data.F32[binNum - 1];
+        y->data.F64[1] = yVec->data.F32[binNum];
+        y->data.F64[2] = yVec->data.F32[binNum + 1];
+      //        x->data.F64[0] = binNum - 1;
+      //        x->data.F64[1] = binNum;
+      //        x->data.F64[2] = binNum + 1;
+      x->data.F64[0] = xVec->data.F32[binNum - 2];
+      x->data.F64[1] = xVec->data.F32[binNum - 1];
+      x->data.F64[2] = xVec->data.F32[binNum + 0];
+      x->data.F64[3] = xVec->data.F32[binNum + 1];
+      x->data.F64[4] = xVec->data.F32[binNum + 2];
+        y->data.F64[0] = yVec->data.F32[binNum - 2];
+        y->data.F64[1] = yVec->data.F32[binNum - 1];
+        y->data.F64[2] = yVec->data.F32[binNum + 0];
+        y->data.F64[3] = yVec->data.F32[binNum + 1];
+        y->data.F64[4] = yVec->data.F32[binNum + 2];
         psTrace(TRACE, 6, "x vec (orig) is (%f %f %f %f)\n", xVec->data.F32[binNum - 1], xVec->data.F32[binNum], xVec->data.F32[binNum+1], xVec->data.F32[binNum+2]);
         psTrace(TRACE, 6, "x data is (%f %f %f)\n", x->data.F64[0], x->data.F64[1], x->data.F64[2]);
         psTrace(TRACE, 6, "y data is (%f %f %f)\n", y->data.F64[0], y->data.F64[1], y->data.F64[2]);
+#if (CZW)
+        printf("  polyin: %f %f %f == %f %f %f\n",
+               x->data.F64[0], x->data.F64[1], x->data.F64[2],
+               y->data.F64[0], y->data.F64[1], y->data.F64[2]);
+        printf("  rawpolyin: %f %f %f == %f %f %f\n",
+               xVec->data.F32[binNum - 1], xVec->data.F32[binNum], xVec->data.F32[binNum + 1],
+               y->data.F64[0], y->data.F64[1], y->data.F64[2]);
+#endif
         // Ensure that the y value lies within range of the y values.
         if (! (((y->data.F64[0] <= yVal) && (yVal <= y->data.F64[2])) ||
                ((y->data.F64[2] <= yVal) && (yVal <= y->data.F64[0]))) ) {
+        if (! (((y->data.F64[0] <= yVal) && (yVal <= y->data.F64[4])) ||
+               ((y->data.F64[4] <= yVal) && (yVal <= y->data.F64[0]))) ) {
             psError(PS_ERR_BAD_PARAMETER_VALUE, true,
                     _("Specified yVal, %g, is not within y-range, %g to %g."),
 …
                 (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[1]),
                 (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[2]));
+#if (CZW)
+        printf("  poly: %f %f %f fit: %f %f %f\n",
+               myPoly->coeff[0],myPoly->coeff[1],myPoly->coeff[2],
+               (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[0]),
+               (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[1]),
+               (psF32) psPolynomial1DEval(myPoly, (psF64) x->data.F64[2]));
+#endif
         psTrace(TRACE, 6, "We fit the polynomial, now find x such that f(x) equals %f\n", yVal);
         float binValue = QuadraticInverse(myPoly->coeff[2], myPoly->coeff[1], myPoly->coeff[0], yVal, x->data.F64[0], x->data.F64[2]);
+        float binValue = QuadraticInverse(myPoly->coeff[2], myPoly->coeff[1], myPoly->coeff[0], yVal, x->data.F64[0], x->data.F64[4]);
         psFree(myPoly);
 …
             return(NAN);
+        }
         // I believe that mathematically the fitted bin position must be between binNum - 1 and binNum + 1
+        assert (binValue >= binNum - 1);
+        assert (binValue <= binNum + 1);
+        int fitBin = binValue;
+        float dX = xVec->data.F32[fitBin+1] - xVec->data.F32[fitBin];
+        float dY = binValue - fitBin;
+        tmpFloat = xVec->data.F32[fitBin] + dY * dX;
+        //      assert (binValue >= binNum - 1);
+        //      assert (binValue <= binNum + 1);
+        //      int fitBin = binValue;
+        //        float dX = xVec->data.F32[fitBin+1] - xVec->data.F32[fitBin];
+        //        float dY = binValue - fitBin;
+        //        tmpFloat = xVec->data.F32[fitBin] + dY * dX;
+        tmpFloat = binValue;
+#if (CZW)
+        printf("   internal median: %f %f\n",tmpFloat,binValue);
+#endif
     } else {
         // These are special cases where the bin is at the beginning or end of the vector.

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 36105

Legend:

trunk/psLib/src/math/psStats.c

Download in other formats: