Changeset 36237
- Timestamp:
- Oct 24, 2013, 6:08:23 PM (13 years ago)
- Location:
- trunk/psLib/src/math
- Files:
-
- 2 edited
-
psMinimizePolyFit.c (modified) (1 diff)
-
psStats.c (modified) (16 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/psLib/src/math/psMinimizePolyFit.c
r36222 r36237 770 770 #endif 771 771 } 772 psFree(Zcoeff); 773 psFree(ZcoeffErr); 774 772 775 } 773 776 -
trunk/psLib/src/math/psStats.c
r36222 r36237 141 141 142 142 // Debug information 143 #define CZW 1143 #define CZW 0 144 144 145 145 /*****************************************************************************/ … … 425 425 for (long i = 0; i < myVector->n; i++) { 426 426 // Check if the data is with the specified range 427 428 427 if (!isfinite(data[i])) 429 428 continue; … … 881 880 cumulative = psHistogramAlloc(min, max, numBins); 882 881 cumulative->nums->data.F32[0] = histogram->nums->data.F32[0]; 883 cumulative->bounds->data.F32[0] = histogram->bounds->data.F32[ 0];882 cumulative->bounds->data.F32[0] = histogram->bounds->data.F32[1]; 884 883 885 884 // Correctly fill the cumulative distribution with monotonically increasing values (skip zero valued bins). … … 887 886 // the boundaries for the current cumulative bin are from upper end of the last valid histogram bin to the 888 887 // upper end of the current histogram bin 889 for (long i = 1; i < histogram->nums->n ; i++) {888 for (long i = 1; i < histogram->nums->n - 1; i++) { 890 889 if (histogram->nums->data.F32[i] == 0.0) continue; 891 890 cumulative->nums->data.F32[Nc] = cumulative->nums->data.F32[Nc - 1] + histogram->nums->data.F32[i]; 892 cumulative->bounds->data.F32[Nc] = histogram->bounds->data.F32[i ];891 cumulative->bounds->data.F32[Nc] = histogram->bounds->data.F32[i+1]; 893 892 Nc ++; 894 893 } … … 898 897 cumulative->bounds->data.F32[i] = cumulative->bounds->data.F32[i-1] + 1.0; 899 898 } 900 901 # if (0)902 // Correctly fill the cumulative distribution with monotonically increasing values (skip zero valued bins).903 long delta = 0;904 long delta_x = 0;905 for (long i = 1; i < histogram->nums->n; i++) {906 if (histogram->nums->data.F32[i] > 0.0) { // This bin is bigger than the last one.907 cumulative->nums->data.F32[i - delta] = cumulative->nums->data.F32[i - delta - 1] + histogram->nums->data.F32[i];908 cumulative->bounds->data.F32[i - delta - 1] = histogram->bounds->data.F32[i - delta_x];909 delta_x = 0;910 }911 else { // This bin is the same as the last one, so we shouldn't count this bound912 delta++;913 delta_x++;914 }915 }916 for (long i = histogram->nums->n - delta; i < histogram->nums->n; i++) { // Ensure the unused entries are filled.917 cumulative->nums->data.F32[i] = cumulative->nums->data.F32[histogram->nums->n - delta - 1];918 cumulative->bounds->data.F32[i] = cumulative->bounds->data.F32[i-1] + 1.0;919 }920 # endif921 899 922 900 if (psTraceGetLevel("psLib.math") >= 8) { … … 940 918 // There's no reason to do a quadratic fit near the 50% bin, as it's approximately linear there. 941 919 // Instead, do a 5-point linear fit. 942 # if (0)943 { // Quick 5-point linear fit944 double Sx = (cumulative->nums->data.F32[binMedian - 2] + cumulative->nums->data.F32[binMedian - 1] +945 cumulative->nums->data.F32[binMedian - 0] +946 cumulative->nums->data.F32[binMedian + 1] + cumulative->nums->data.F32[binMedian + 2]);947 double Sy = (cumulative->bounds->data.F32[binMedian - 2] + cumulative->bounds->data.F32[binMedian - 1] +948 cumulative->bounds->data.F32[binMedian - 0] +949 cumulative->bounds->data.F32[binMedian + 1] + cumulative->bounds->data.F32[binMedian + 2]);950 double Sxx = (pow(cumulative->nums->data.F32[binMedian - 2],2) + pow(cumulative->nums->data.F32[binMedian - 1],2) +951 pow(cumulative->nums->data.F32[binMedian - 0],2) +952 pow(cumulative->nums->data.F32[binMedian + 1],2) + pow(cumulative->nums->data.F32[binMedian + 2],2));953 double Sxy = (cumulative->bounds->data.F32[binMedian - 2] * cumulative->nums->data.F32[binMedian - 2] +954 cumulative->bounds->data.F32[binMedian - 1] * cumulative->nums->data.F32[binMedian - 1] +955 cumulative->bounds->data.F32[binMedian - 0] * cumulative->nums->data.F32[binMedian - 0] +956 cumulative->bounds->data.F32[binMedian + 1] * cumulative->nums->data.F32[binMedian + 1] +957 cumulative->bounds->data.F32[binMedian + 2] * cumulative->nums->data.F32[binMedian + 2]);958 double linearMedian = ((Sy * Sxx - Sx * Sxy) + (5 * Sxy - Sx * Sy) * (totalDataPoints/2.0))/(5 * Sxx - Sx * Sx);959 // psLogMsg("psLib",5,"Median Comp: %f %f\n",stats->robustMedian,linearMedian);960 stats->robustMedian = linearMedian;961 }962 # endif963 920 stats->robustMedian = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binMedian, totalDataPoints/2.0); 964 921 … … 1003 960 // (extrapolation should not be needed and will result in errors) 1004 961 float binLoF32, binHiF32, binL2F32, binH2F32, binL4F32, binH4F32; 962 #if (0) 1005 963 PS_BIN_INTERPOLATE (binLoF32, cumulative->nums, cumulative->bounds, binLo, 1006 964 totalDataPoints * 0.158655f); … … 1015 973 PS_BIN_INTERPOLATE (binH4F32, cumulative->nums, cumulative->bounds, binH4, 1016 974 totalDataPoints * 0.977250f); 1017 975 #else 976 binLoF32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binLo, totalDataPoints * 0.158655); 977 binHiF32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binHi, totalDataPoints * 0.841345); 978 binL2F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binL2, totalDataPoints * 0.308538); 979 binH2F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binH2, totalDataPoints * 0.691462); 980 binL4F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binL4, totalDataPoints * 0.022750); 981 binH4F32 = fitLinearSearchForYThenReturnBin(cumulative->bounds, cumulative->nums, binH4, totalDataPoints * 0.977250); 982 #endif 1018 983 // report +/- 1 sigma points 1019 984 psTrace(TRACE, 5, … … 1027 992 binL4F32, binH4F32); 1028 993 994 // If some of the fits failed, attempt to fix this 995 if (!isfinite(binLoF32) && isfinite(binHiF32)) { binLoF32 = -1.0 * binHiF32; } 996 if (!isfinite(binHiF32) && isfinite(binLoF32)) { binHiF32 = -1.0 * binLoF32; } 997 if (!isfinite(binL2F32) && isfinite(binH2F32)) { binL2F32 = -1.0 * binH2F32; } 998 if (!isfinite(binH2F32) && isfinite(binL2F32)) { binH2F32 = -1.0 * binL2F32; } 999 if (!isfinite(binL4F32) && isfinite(binH4F32)) { binL4F32 = -1.0 * binH4F32; } 1000 if (!isfinite(binH4F32) && isfinite(binL4F32)) { binH4F32 = -1.0 * binL4F32; } 1001 1029 1002 // ADD step 5: Determine SIGMA as the distance between binL2 and binH2 (+/- 0.5 sigma) 1003 1004 1030 1005 float sigma1 = (binH2F32 - binL2F32); 1031 1006 float sigma2 = (binHiF32 - binLoF32) / 2.0; 1032 1007 float sigma4 = (binH4F32 - binL4F32) / 4.0; 1033 1008 1009 // Fix again? 1010 if (!isfinite(sigma1) && isfinite(sigma2) && isfinite(sigma4)) { sigma1 = (sigma2 + sigma4) / 2.0; } 1011 if (!isfinite(sigma2) && isfinite(sigma1) && isfinite(sigma4)) { sigma2 = (sigma1 + sigma4) / 2.0; } 1012 if (!isfinite(sigma4) && isfinite(sigma2) && isfinite(sigma1)) { sigma4 = (sigma2 + sigma1) / 2.0; } 1013 1034 1014 // take the smallest of the three: if we have a clump with wide outliers, sigma2 and 1035 1015 // sigma4 will be biased high; if we have a bi-modal distribution, sigma1 and sigma2 1036 1016 // will be biased high. 1037 sigma = PS_MIN (sigma1, PS_MIN (sigma2, sigma4)); 1017 // sigma = PS_MIN (sigma1, PS_MIN (sigma2, sigma4)); 1018 // CZW: Instead, take the median. Taking the MIN forces a bias on unbiased data. 1019 // It seems like occasionally getting the wrong answer on a complex distribution 1020 // is more acceptable than always getting the wrong answer for simple ones. 1021 1022 1023 sigma = PS_MAX( PS_MIN(sigma1,sigma2), 1024 PS_MIN( PS_MAX(sigma1,sigma2), 1025 sigma4)); 1038 1026 1039 1027 psTrace(TRACE, 6, "The 1x sigma is %f.\n", sigma1); … … 1042 1030 1043 1031 psTrace(TRACE, 6, "The current sigma is %f.\n", sigma); 1044 stats->robustStdev = sigma; 1045 1046 1047 #if (CZW && 0) 1032 // stats->robustStdev = sigma; 1033 stats->robustStdev = sigma; 1034 1035 #if (CZW && 0) 1036 // Skewness check: Find least biased sample for each pair. 1037 sigma1 = 2.0 * PS_MIN(binH2F32 - stats->robustMedian, 1038 stats->robustMedian - binL2F32); 1039 sigma2 = 1.0 * PS_MIN(binHiF32 - stats->robustMedian, 1040 stats->robustMedian - binLoF32); 1041 sigma4 = 0.5 * PS_MIN(binH4F32 - stats->robustMedian, 1042 stats->robustMedian - binL4F32); 1043 // Kurtosis check: Take median sample as the solution. 1044 stats->robustStdev = PS_MAX( PS_MIN(sigma1,sigma2), 1045 PS_MIN( PS_MAX(sigma1,sigma2), 1046 sigma4)); 1047 #endif 1048 1049 1050 #if (0) 1051 printf("CZW: bad sigma?: %f %f %f %f %f %f %f %f %f %f\n", 1052 binH2F32,binL2F32,binHiF32,binLoF32,binH4F32,binL4F32, 1053 sigma1,sigma2,sigma4,sigma); 1054 1048 1055 printf("CZW (%d): median %f sigma %f delta: %f \n\t %f %f %f %f %f %f %f \n\t %f %f %f %f %f %f %f\n", 1049 1056 iterate, … … 1200 1207 * "vectorFittedStats_v4" all versions of fitted stats now resolve to this function (only v4 1201 1208 * has really been used) vectorFittedStats requires guess for fittedMean and fittedStdev 1202 * robustN50 should also be set gaussian fit is performed using 2D polynomial to ln(y) this1209 * robustN50 should also be set gaussian fit is performed using 1D polynomial to ln(y) this 1203 1210 * version follows the upper portion of the distribution until it passes 0.5*peak 1204 1211 ********************/ … … 1235 1242 return true; 1236 1243 } 1237 1244 if (myVector->n < 1) { printf("There are no elements in this vector.\n"); abort(); } 1238 1245 float guessStdev = stats->robustStdev; // pass the guess sigma 1239 1246 float guessMean = stats->robustMedian; // pass the guess mean … … 1255 1262 // set roughly so that the lowest bins have about 2 cnts 1256 1263 // Nsmallest ~ N50 / (4*dN)) 1257 psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8)); 1258 binSize = guessStdev / dN;1264 psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8)); 1265 binSize = guessStdev / dN; 1259 1266 } 1260 1267 … … 1288 1295 psTrace(TRACE, 6, "The numBins is %ld\n", numBins); 1289 1296 1290 1297 1291 1298 psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers) 1292 1299 if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) { … … 1551 1558 1552 1559 1553 #if (CZW && 0)1560 #if (CZW && 1) 1554 1561 printf("CZW IN FITTED: iter %d %f \n" 1555 1562 " low %f %f \n" … … 2575 2582 Sxx += PS_SQR(yVec->data.F32[u]); 2576 2583 2577 deltaY = 0.5 * (xVec->data.F32[u] + xVec->data.F32[u+1]); 2584 deltaY = xVec->data.F32[u]; 2585 //deltaY = 0.5 * (xVec->data.F32[u] + xVec->data.F32[u+1]); 2578 2586 Sy += deltaY; 2579 2587 Sxy += yVec->data.F32[u] * deltaY;
Note:
See TracChangeset
for help on using the changeset viewer.
