Changeset 26615 for trunk/psLib/src/math/psStats.c
- Timestamp:
- Jan 15, 2010, 3:01:31 PM (16 years ago)
- File:
-
- 1 edited
-
trunk/psLib/src/math/psStats.c (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/psLib/src/math/psStats.c
r25884 r26615 749 749 // Iterate to get the best bin size; an iteration limit is enforced at the bottom of the loop. 750 750 for (int iterate = 1; iterate > 0; iterate++) { 751 psTrace(TRACE, 6, 752 "-------------------- Iterating on Bin size. Iteration number %d --------------------\n", 753 iterate); 751 psTrace(TRACE, 6, "-------------------- Iterating on Bin size. Iteration number %d --------------------\n", iterate); 754 752 755 753 // Get the minimum and maximum values … … 791 789 psTrace(TRACE, 6, "Initial robust bin size is %.2f\n", binSize); 792 790 793 // ADD step 0: Construct the histogram with the specified bin size. NOTE: we can not specify the bin 794 // size precisely since the argument to psHistogramAlloc() is the number of bins, not the binSize. If 795 // we get here, we know that binSize != 0.0. 796 long numBins = (max - min) / binSize; // Number of bins 791 // ADD step 0: Construct the histogram with the specified bin size. NOTE: we can 792 // not specify the bin size precisely since the argument to psHistogramAlloc() is 793 // the number of bins, not the binSize. If we get here, we know that binSize != 794 // 0.0. We can also have a floating-point round-off error such that the last bin 795 // of the histogram does not correspond exactly with the value of 'max'. Let's be 796 // a bit generous and extend the histogram by two bins in either direction 797 long numBins = 4 + (max - min) / binSize; // Number of bins 797 798 psTrace(TRACE, 6, "Numbins is %ld\n", numBins); 798 799 psTrace(TRACE, 6, "Creating a robust histogram from data range (%.2f - %.2f)\n", min, max); 799 800 // Generate the histogram 800 histogram = psHistogramAlloc(min , max, numBins);801 histogram = psHistogramAlloc(min - 2.0*binSize, max + 2.0*binSize, numBins); 801 802 // XXXXX we need to consider this step if errors -> variance 802 803 if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) { … … 807 808 psFree(statsMinMax); 808 809 psFree(mask); 809 810 810 return false; 811 811 } … … 814 814 PS_VECTOR_PRINT_F32(histogram->nums); 815 815 } 816 817 // perversity check: if most of the values land in a single bin, then we probably 818 // have a perverse case (eg, small number of points at extremely large / small 819 // values; nearly bi-modal distribution). if so, keep only points within 5? 10? 820 // bins of that excess bin: 821 int nMaxBin = 0; 822 int iMaxBin = 0; 823 for (long i = 1; i < histogram->nums->n; i++) { 824 if (histogram->nums->data.F32[i] > nMaxBin) { 825 nMaxBin = histogram->nums->data.F32[i]; 826 iMaxBin = i; 827 } 828 } 829 if (nMaxBin > numValid / 2) { 830 float minKeep = histogram->bounds->data.F32[iMaxBin] - 10*binSize; 831 float maxKeep = histogram->bounds->data.F32[iMaxBin + 1] + 10*binSize; 832 int nInvalid = 0; 833 for (long i = 0; i < myVector->n; i++) { 834 // skip the already-masked values 835 if (mask->data.PS_TYPE_VECTOR_MASK_DATA[i] & maskVal) continue; 836 bool invalid = false; 837 invalid |= (myVector->data.F32[i] < minKeep); 838 invalid |= (myVector->data.F32[i] > maxKeep); 839 invalid |= (!isfinite(myVector->data.F32[i])); 840 if (!invalid) continue; 841 mask->data.PS_TYPE_VECTOR_MASK_DATA[i] = maskVal; 842 nInvalid ++; 843 } 844 psTrace(TRACE, 6, "data is concentrated in a single bin, masking %d extreme outliers and retrying\n", nInvalid); 845 psFree(histogram); 846 psFree(cumulative); 847 continue; 848 } 816 849 817 850 // ADD step 1: Convert the specific histogram to a cumulative histogram … … 1007 1040 stats->robustN50 = N50; 1008 1041 psTrace(TRACE, 6, "The robustN50 is %ld.\n", N50); 1042 psTrace(TRACE, 6, "The robust median and stdev are %f, %f\n", stats->robustMedian, stats->robustStdev); 1009 1043 1010 1044 // Clean up
Note:
See TracChangeset
for help on using the changeset viewer.
