Changeset 14931 for trunk/psLib/src/math/psStats.c
- Timestamp:
- Sep 20, 2007, 1:59:03 PM (19 years ago)
- File:
-
- 1 edited
-
trunk/psLib/src/math/psStats.c (modified) (15 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/psLib/src/math/psStats.c
r14440 r14931 13 13 * use ->min and ->max (PS_STAT_USE_RANGE) 14 14 * 15 * @version $Revision: 1.21 5$ $Name: not supported by cvs2svn $16 * @date $Date: 2007-0 8-08 21:31:42$15 * @version $Revision: 1.216 $ $Name: not supported by cvs2svn $ 16 * @date $Date: 2007-09-20 23:59:01 $ 17 17 * 18 18 * Copyright 2006 IfA, University of Hawaii … … 1469 1469 valPeak = histogram->nums->data.F32[binPeak]; 1470 1470 } 1471 } 1471 psTrace (TRACE, 6, "(%f = %.0f) ", histogram->bounds->data.F32[i], histogram->nums->data.F32[i]); 1472 } 1473 psTrace (TRACE, 6, "\n"); 1472 1474 1473 1475 // assume a reasonably well-defined gaussian-like population; run from peak out until val < 0.25*peak … … 1477 1479 psTrace(TRACE, 6, "The clipped max is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMax - 1), binMax - 1); 1478 1480 psTrace(TRACE, 6, "The clipped peak is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binPeak), binPeak); 1481 psTrace(TRACE, 6, "The clipped peak value is %f\n", histogram->nums->data.F32[binPeak]); 1479 1482 1480 1483 { … … 1529 1532 if (iteration == 0) { 1530 1533 guessStdev = 0.25*guessStdev; 1531 psTrace(TRACE, 6, "*** retry stdev is %f.\n", guessStdev);1534 psTrace(TRACE, 6, "*** retry, new stdev is %f.\n", guessStdev); 1532 1535 continue; 1533 1536 } … … 1551 1554 } 1552 1555 1556 // for test, measure the same result for the upper section 1553 1557 { 1554 1558 // fit the upper half of the distribution … … 1592 1596 } 1593 1597 1594 // calculate upper mean & stdev from parabolic fit -- use this as the result1598 // calculate upper mean & stdev from parabolic fit -- ignore this value 1595 1599 #ifndef PS_NO_TRACE 1596 1600 float upperStdev = sqrt(-0.5/poly->coeff[2]); … … 1601 1605 #endif 1602 1606 1607 // if the resulting value is outside of the range binMin - binMax, use the upper value 1608 if (done && (guessMean > PS_BIN_MIDPOINT(histogram, binMax - 1))) { 1609 guessMean = upperMean; 1610 guessStdev = upperStdev; 1611 } 1612 1603 1613 psFree (poly); 1604 1614 } … … 1619 1629 stats->results |= PS_STAT_FITTED_MEAN_V3; 1620 1630 stats->results |= PS_STAT_FITTED_STDEV_V3; 1631 1632 return true; 1633 } 1634 1635 /******************** 1636 * perform an asymmetric fit to the population 1637 * vectorFittedStats_v4 requires guess for fittedMean and fittedStdev 1638 * robustN50 should also be set 1639 * gaussian fit is performed using 2D polynomial to ln(y) 1640 * this version follows the upper portion of the distribution until it passes 0.5*peak 1641 ********************/ 1642 static bool vectorFittedStats_v4 (const psVector* myVector, 1643 const psVector* errors, 1644 psVector* mask, 1645 psMaskType maskVal, 1646 psStats* stats) 1647 { 1648 1649 // This procedure requires the mean. If it has not been already 1650 // calculated, then call vectorSampleMean() 1651 if (!(stats->results & PS_STAT_ROBUST_MEDIAN)) { 1652 vectorRobustStats(myVector, errors, mask, maskVal, stats); 1653 } 1654 1655 // If the mean is NAN, then generate a warning and set the stdev to NAN. 1656 if (isnan(stats->robustMedian)) { 1657 stats->fittedStdev = NAN; 1658 stats->fittedStdev = NAN; 1659 psTrace(TRACE, 4, "---- %s() end ----\n", __func__); 1660 return false; 1661 } 1662 1663 float guessStdev = stats->robustStdev; // pass the guess sigma 1664 float guessMean = stats->robustMedian; // pass the guess mean 1665 1666 psTrace(TRACE, 6, "The ** starting ** guess mean is %f.\n", guessMean); 1667 psTrace(TRACE, 6, "The ** starting ** guess stdev is %f.\n", guessStdev); 1668 1669 bool done = false; 1670 for (int iteration = 0; !done && (iteration < 2); iteration ++) { 1671 psStats *statsMinMax = psStatsAlloc(PS_STAT_MIN | PS_STAT_MAX); // Statistics for min and max 1672 1673 psF32 binSize = 1; 1674 if (stats->options & PS_STAT_USE_BINSIZE) { 1675 // Set initial bin size to the specified value. 1676 binSize = stats->binsize; 1677 psTrace(TRACE, 6, "Setting initial robust bin size to %.2f\n", binSize); 1678 } else { 1679 // construct a histogram with (sigma/2 < binsize < sigma) 1680 // set roughly so that the lowest bins have about 2 cnts 1681 // Nsmallest ~ N50 / (4*dN)) 1682 psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8)); 1683 binSize = guessStdev / dN; 1684 } 1685 1686 // Determine the min/max of the vector (which prior outliers masked out) 1687 int numValid = vectorMinMax(myVector, mask, maskVal, statsMinMax); // Number of values 1688 float min = statsMinMax->min; 1689 float max = statsMinMax->max; 1690 if (numValid == 0 || isnan(min) || isnan(max)) { 1691 psError(PS_ERR_UNKNOWN, false, "Failed to calculate the min/max of the input vector.\n"); 1692 psFree(statsMinMax); 1693 psTrace(TRACE, 4, "---- %s(false) end ----\n", __func__); 1694 return false; 1695 } 1696 1697 // Calculate the number of bins. 1698 // XXX can we calculate the binMin, binMax **before** building this histogram? 1699 long numBins = (max - min) / binSize; 1700 psTrace(TRACE, 6, "The new min/max values are (%f, %f).\n", min, max); 1701 psTrace(TRACE, 6, "The new bin size is %f.\n", binSize); 1702 psTrace(TRACE, 6, "The numBins is %ld\n", numBins); 1703 1704 psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers) 1705 if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) { 1706 psError(PS_ERR_UNKNOWN, false, "Unable to generate histogram for fitted statistics v4.\n"); 1707 psFree(histogram); 1708 psFree(statsMinMax); 1709 return false; 1710 } 1711 if (psTraceGetLevel("psLib.math") >= 8) { 1712 PS_VECTOR_PRINT_F32(histogram->nums); 1713 } 1714 1715 // now fit a Gaussian to the upper and lower halves about the peak independently 1716 1717 // set the full-range upper and lower limits 1718 psF32 maxFitSigma = 2.0; 1719 if (isfinite(stats->clipSigma)) { 1720 maxFitSigma = fabs(stats->clipSigma); 1721 } 1722 if (isfinite(stats->max)) { 1723 maxFitSigma = fabs(stats->max); 1724 } 1725 1726 psF32 minFitSigma = 2.0; 1727 if (isfinite(stats->clipSigma)) { 1728 minFitSigma = fabs(stats->clipSigma); 1729 } 1730 if (isfinite(stats->min)) { 1731 minFitSigma = fabs(stats->min); 1732 } 1733 1734 // select the min and max bins, saturating on the lower and upper end-points 1735 long binMin, binMax; 1736 PS_BIN_FOR_VALUE (binMin, histogram->bounds, guessMean - minFitSigma*guessStdev, 0); 1737 PS_BIN_FOR_VALUE (binMax, histogram->bounds, guessMean + maxFitSigma*guessStdev, 0); 1738 if (binMin == binMax) { 1739 psError(PS_ERR_UNKNOWN, false, "Failed to calculate the min/max of the input vector.\n"); 1740 psFree(statsMinMax); 1741 psTrace(TRACE, 4, "---- %s(false) end ----\n", __func__); 1742 return false; 1743 } 1744 1745 // search for mode (peak of histogram within range mean-2sigma - mean+2sigma 1746 long binPeak = binMin; 1747 float valPeak = histogram->nums->data.F32[binPeak]; 1748 for (int i = binMin; i < binMax; i++) { 1749 if (histogram->nums->data.F32[i] > valPeak) { 1750 binPeak = i; 1751 valPeak = histogram->nums->data.F32[binPeak]; 1752 } 1753 psTrace (TRACE, 6, "(%f = %.0f) ", histogram->bounds->data.F32[i], histogram->nums->data.F32[i]); 1754 } 1755 psTrace (TRACE, 6, "\n"); 1756 1757 // assume a reasonably well-defined gaussian-like population; run from peak out until val < 0.25*peak 1758 1759 psTrace(TRACE, 6, "The clipped numBins is %ld\n", binMax - binMin); 1760 psTrace(TRACE, 6, "The clipped min is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMin), binMin); 1761 psTrace(TRACE, 6, "The clipped max is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMax - 1), binMax - 1); 1762 psTrace(TRACE, 6, "The clipped peak is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binPeak), binPeak); 1763 psTrace(TRACE, 6, "The clipped peak value is %f\n", histogram->nums->data.F32[binPeak]); 1764 1765 { 1766 // fit the lower half of the distribution 1767 // run down until we drop below 0.25*valPeak 1768 // run up until we drop below 0.50*valPeak 1769 long binS = binMin; 1770 long binE = PS_MIN (binPeak + 3, binMax); 1771 for (int i = binPeak - 3; i >= binMin; i--) { 1772 if (histogram->nums->data.F32[i] < 0.25*valPeak) { 1773 binS = i; 1774 break; 1775 } 1776 } 1777 for (int i = binPeak + 3; i < binMax; i++) { 1778 if (histogram->nums->data.F32[i] < 0.50*valPeak) { 1779 binE = i; 1780 break; 1781 } 1782 } 1783 psTrace(TRACE, 6, "Lower bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS); 1784 psTrace(TRACE, 6, "Upper bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE); 1785 1786 psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates 1787 psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates 1788 long j = 0; 1789 for (long i = binS; i < binE; i++) { 1790 if (histogram->nums->data.F32[i] <= 0.0) 1791 continue; 1792 x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i); 1793 y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2) 1794 j++; 1795 } 1796 y->n = x->n = j; 1797 1798 // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2 1799 psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2); 1800 bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x); 1801 psFree(x); 1802 psFree(y); 1803 1804 if (!status) { 1805 psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n"); 1806 psFree(poly); 1807 psFree(histogram); 1808 psFree(statsMinMax); 1809 psTrace(TRACE, 4, "---- %s(false) end ----\n", __func__); 1810 return false; 1811 } 1812 1813 if (poly->coeff[2] >= 0.0) { 1814 psTrace(TRACE, 6, "Failed parabolic fit: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1815 psFree(poly); 1816 psFree(histogram); 1817 psFree(statsMinMax); 1818 1819 // sometimes, the guessStdev is much too large. in this case, the entire real population tends to be found 1820 // in a single bin. make one attempt to recover by dropping the guessStdev down by a jump and trying again 1821 if (iteration == 0) { 1822 guessStdev = 0.25*guessStdev; 1823 psTrace(TRACE, 6, "*** retry, new stdev is %f.\n", guessStdev); 1824 continue; 1825 } 1826 1827 psError(PS_ERR_UNKNOWN, false, "fit did not converge\n"); 1828 psTrace(TRACE, 4, "---- %s(false) end ----\n", __func__); 1829 return false; 1830 } 1831 1832 // calculate lower mean & stdev from parabolic fit -- use this as the result 1833 guessStdev = sqrt(-0.5/poly->coeff[2]); 1834 guessMean = poly->coeff[1]*PS_SQR(guessStdev); 1835 if (guessStdev > 0.75*stats->robustStdev) { 1836 done = true; 1837 } 1838 psTrace(TRACE, 6, "Parabolic Lower fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1839 psTrace(TRACE, 6, "The lower mean is %f.\n", guessMean); 1840 psTrace(TRACE, 6, "The lower stdev is %f.\n", guessStdev); 1841 1842 psFree(poly); 1843 } 1844 1845 // if we converge on a solution outside the range binMin - binMax, use a more conservative range 1846 if (done && (guessMean > PS_BIN_MIDPOINT(histogram, binMax - 1))) { 1847 psTrace(TRACE, 6, "Inconsistent result, re-trying the fit\n"); 1848 1849 // fit a symmetric distribution 1850 // run up until we drop below 0.15*valPeak 1851 // run up until we drop below 0.15*valPeak 1852 long binS = binMin; 1853 long binE = binMax; 1854 for (int i = binPeak - 3; i >= binMin; i--) { 1855 if (histogram->nums->data.F32[i] < 0.15*valPeak) { 1856 binS = i; 1857 break; 1858 } 1859 } 1860 for (int i = binPeak + 3; i < binMax; i++) { 1861 if (histogram->nums->data.F32[i] < 0.15*valPeak) { 1862 binE = i; 1863 break; 1864 } 1865 } 1866 psTrace(TRACE, 6, "Lower bound for symmetric range: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS); 1867 psTrace(TRACE, 6, "Upper bound for symmetric range: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE); 1868 1869 psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates 1870 psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates 1871 long j = 0; 1872 for (long i = binS; i < binE; i++) { 1873 if (histogram->nums->data.F32[i] <= 0.0) 1874 continue; 1875 x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i); 1876 y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2) 1877 j++; 1878 } 1879 y->n = x->n = j; 1880 1881 // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2 1882 psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2); 1883 bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x); 1884 psFree(x); 1885 psFree(y); 1886 1887 if (!status) { 1888 psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n"); 1889 psFree(poly); 1890 psFree(histogram); 1891 psFree(statsMinMax); 1892 psTrace(TRACE, 4, "---- %s(false) end ----\n", __func__); 1893 return false; 1894 } 1895 1896 // calculate upper mean & stdev from parabolic fit -- ignore this value 1897 #ifndef PS_NO_TRACE 1898 guessStdev = sqrt(-0.5/poly->coeff[2]); 1899 guessMean = poly->coeff[1]*PS_SQR(guessStdev); 1900 psTrace(TRACE, 6, "Parabolic Symmetric fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1901 psTrace(TRACE, 6, "The symmetric mean is %f.\n", guessMean); 1902 psTrace(TRACE, 6, "The symmetric stdev is %f.\n", guessStdev); 1903 #endif 1904 1905 psFree (poly); 1906 } 1907 1908 // Clean up after fitting 1909 psFree (histogram); 1910 psFree (statsMinMax); 1911 } 1912 1913 // The fitted mean is the Gaussian mean. 1914 stats->fittedMean = guessMean; 1915 psTrace(TRACE, 6, "The fitted mean is %f.\n", stats->fittedMean); 1916 1917 // The fitted standard deviation 1918 stats->fittedStdev = guessStdev; 1919 psTrace(TRACE, 6, "The fitted stdev is %f.\n", stats->fittedStdev); 1920 1921 stats->results |= PS_STAT_FITTED_MEAN_V4; 1922 stats->results |= PS_STAT_FITTED_STDEV_V4; 1621 1923 1622 1924 return true; … … 1748 2050 psStats *stats = p_psAlloc(file, lineno, func, sizeof(psStats)); 1749 2051 psMemSetDeallocator(stats, (psFreeFunc)statsFree); 2052 2053 // set initial, default values 2054 psStatsInit (stats); 2055 2056 // these values are can be set as desired by the user. they are not affected by 2057 // psStatsInit 2058 stats->clipSigma = 3.0; 2059 stats->clipIter = 3; 2060 stats->nSubsample = 100000; 2061 stats->options = options; 2062 2063 psTrace(TRACE, 3, "---- %s() end ----\n", __func__); 2064 return stats; 2065 } 2066 2067 // reset the values which are output, and which may be used from one psStats stage to the next 2068 void psStatsInit(psStats *stats) 2069 { 1750 2070 stats->sampleMean = NAN; 1751 2071 stats->sampleMedian = NAN; … … 1766 2086 stats->clippedStdev = NAN; 1767 2087 stats->clippedNvalues = -1; // XXX: This is never used 1768 stats->clipSigma = 3.0;1769 stats->clipIter = 3;1770 2088 stats->min = NAN; 1771 2089 stats->max = NAN; 1772 2090 stats->binsize = NAN; 1773 stats->nSubsample = 100000; 1774 stats->options = options; 1775 1776 psTrace(TRACE, 3, "---- %s() end ----\n", __func__); 1777 return stats; 2091 return; 1778 2092 } 1779 2093 … … 1928 2242 } 1929 2243 if (!vectorFittedStats_v3(inF32, errorsF32, maskU8, maskVal, stats)) { 2244 psError(PS_ERR_UNKNOWN, false, _("Failed to calculate fitted statistics")); 2245 status &= false; 2246 } 2247 } 2248 2249 // ************************************************************************ 2250 if (stats->options & (PS_STAT_FITTED_MEAN_V4 | PS_STAT_FITTED_STDEV_V4)) { 2251 if (stats->options & (PS_STAT_FITTED_MEAN | PS_STAT_FITTED_STDEV)) { 2252 psAbort("you may not specify both FITTED_MEAN and FITTED_MEAN_V4"); 2253 } 2254 if (!vectorFittedStats_v4(inF32, errorsF32, maskU8, maskVal, stats)) { 1930 2255 psError(PS_ERR_UNKNOWN, false, _("Failed to calculate fitted statistics")); 1931 2256 status &= false; … … 1980 2305 READ_STAT("FITTED_MEAN_V3", PS_STAT_FITTED_MEAN_V3); 1981 2306 READ_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3); 2307 READ_STAT("FITTED_V4", PS_STAT_FITTED_MEAN_V4); 2308 READ_STAT("FITTED_MEAN_V4", PS_STAT_FITTED_MEAN_V4); 2309 READ_STAT("FITTED_STDEV_V4", PS_STAT_FITTED_STDEV_V4); 1982 2310 READ_STAT("CLIPPED", PS_STAT_CLIPPED_MEAN); 1983 2311 READ_STAT("CLIPPED_MEAN", PS_STAT_CLIPPED_MEAN); … … 2013 2341 WRITE_STAT("FITTED_MEAN_V3", PS_STAT_FITTED_MEAN_V3); 2014 2342 WRITE_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3); 2343 WRITE_STAT("FITTED_MEAN_V4", PS_STAT_FITTED_MEAN_V4); 2344 WRITE_STAT("FITTED_STDEV_V4", PS_STAT_FITTED_STDEV_V4); 2015 2345 WRITE_STAT("CLIPPED_MEAN", PS_STAT_CLIPPED_MEAN); 2016 2346 WRITE_STAT("CLIPPED_STDEV", PS_STAT_CLIPPED_STDEV); … … 2067 2397 case PS_STAT_FITTED_MEAN_V3: 2068 2398 case PS_STAT_FITTED_STDEV_V3: 2399 case PS_STAT_FITTED_MEAN_V4: 2400 case PS_STAT_FITTED_STDEV_V4: 2069 2401 case PS_STAT_CLIPPED_MEAN: 2070 2402 case PS_STAT_CLIPPED_STDEV: … … 2109 2441 case PS_STAT_FITTED_STDEV_V3: 2110 2442 return stats->fittedStdev; 2443 case PS_STAT_FITTED_MEAN_V4: 2444 return stats->fittedMean; 2445 case PS_STAT_FITTED_STDEV_V4: 2446 return stats->fittedStdev; 2111 2447 case PS_STAT_CLIPPED_MEAN: 2112 2448 return stats->clippedMean;
Note:
See TracChangeset
for help on using the changeset viewer.
