IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Sep 20, 2007, 1:59:03 PM (19 years ago)
Author:
eugene
Message:

added psStatsInit, FITTED_MEAN_V4, fixed out-of-range error in FITTED_MEAN_V3

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/math/psStats.c

    r14440 r14931  
    1313 * use ->min and ->max (PS_STAT_USE_RANGE)
    1414 *
    15  *  @version $Revision: 1.215 $ $Name: not supported by cvs2svn $
    16  *  @date $Date: 2007-08-08 21:31:42 $
     15 *  @version $Revision: 1.216 $ $Name: not supported by cvs2svn $
     16 *  @date $Date: 2007-09-20 23:59:01 $
    1717 *
    1818 *  Copyright 2006 IfA, University of Hawaii
     
    14691469                valPeak = histogram->nums->data.F32[binPeak];
    14701470            }
    1471         }
     1471            psTrace (TRACE, 6, "(%f = %.0f) ", histogram->bounds->data.F32[i], histogram->nums->data.F32[i]);
     1472        }
     1473        psTrace (TRACE, 6, "\n");
    14721474
    14731475        // assume a reasonably well-defined gaussian-like population; run from peak out until val < 0.25*peak
     
    14771479        psTrace(TRACE, 6, "The clipped max is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMax - 1), binMax - 1);
    14781480        psTrace(TRACE, 6, "The clipped peak is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binPeak), binPeak);
     1481        psTrace(TRACE, 6, "The clipped peak value is %f\n", histogram->nums->data.F32[binPeak]);
    14791482
    14801483        {
     
    15291532                if (iteration == 0) {
    15301533                    guessStdev = 0.25*guessStdev;
    1531                     psTrace(TRACE, 6, "*** retry stdev is %f.\n", guessStdev);
     1534                    psTrace(TRACE, 6, "*** retry, new stdev is %f.\n", guessStdev);
    15321535                    continue;
    15331536                }
     
    15511554        }
    15521555
     1556        // for test, measure the same result for the upper section
    15531557        {
    15541558            // fit the upper half of the distribution
     
    15921596            }
    15931597
    1594             // calculate upper mean & stdev from parabolic fit -- use this as the result
     1598            // calculate upper mean & stdev from parabolic fit -- ignore this value
    15951599#ifndef PS_NO_TRACE
    15961600            float upperStdev = sqrt(-0.5/poly->coeff[2]);
     
    16011605#endif
    16021606
     1607            // if the resulting value is outside of the range binMin - binMax, use the upper value
     1608            if (done && (guessMean > PS_BIN_MIDPOINT(histogram, binMax - 1))) {
     1609                guessMean = upperMean;
     1610                guessStdev = upperStdev;
     1611            }
     1612
    16031613            psFree (poly);
    16041614        }
     
    16191629    stats->results |= PS_STAT_FITTED_MEAN_V3;
    16201630    stats->results |= PS_STAT_FITTED_STDEV_V3;
     1631
     1632    return true;
     1633}
     1634
     1635/********************
     1636 * perform an asymmetric fit to the population
     1637 * vectorFittedStats_v4 requires guess for fittedMean and fittedStdev
     1638 * robustN50 should also be set
     1639 * gaussian fit is performed using 2D polynomial to ln(y)
     1640 * this version follows the upper portion of the distribution until it passes 0.5*peak
     1641 ********************/
     1642static bool vectorFittedStats_v4 (const psVector* myVector,
     1643                                  const psVector* errors,
     1644                                  psVector* mask,
     1645                                  psMaskType maskVal,
     1646                                  psStats* stats)
     1647{
     1648
     1649    // This procedure requires the mean.  If it has not been already
     1650    // calculated, then call vectorSampleMean()
     1651    if (!(stats->results & PS_STAT_ROBUST_MEDIAN)) {
     1652        vectorRobustStats(myVector, errors, mask, maskVal, stats);
     1653    }
     1654
     1655    // If the mean is NAN, then generate a warning and set the stdev to NAN.
     1656    if (isnan(stats->robustMedian)) {
     1657        stats->fittedStdev = NAN;
     1658        stats->fittedStdev = NAN;
     1659        psTrace(TRACE, 4, "---- %s() end ----\n", __func__);
     1660        return false;
     1661    }
     1662
     1663    float guessStdev = stats->robustStdev;  // pass the guess sigma
     1664    float guessMean = stats->robustMedian;  // pass the guess mean
     1665
     1666    psTrace(TRACE, 6, "The ** starting ** guess mean  is %f.\n", guessMean);
     1667    psTrace(TRACE, 6, "The ** starting ** guess stdev is %f.\n", guessStdev);
     1668
     1669    bool done = false;
     1670    for (int iteration = 0; !done && (iteration < 2); iteration ++) {
     1671        psStats *statsMinMax = psStatsAlloc(PS_STAT_MIN | PS_STAT_MAX); // Statistics for min and max
     1672
     1673        psF32 binSize = 1;
     1674        if (stats->options & PS_STAT_USE_BINSIZE) {
     1675            // Set initial bin size to the specified value.
     1676            binSize = stats->binsize;
     1677            psTrace(TRACE, 6, "Setting initial robust bin size to %.2f\n", binSize);
     1678        } else {
     1679            // construct a histogram with (sigma/2 < binsize < sigma)
     1680            // set roughly so that the lowest bins have about 2 cnts
     1681            // Nsmallest ~ N50 / (4*dN))
     1682            psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8));
     1683            binSize = guessStdev / dN;
     1684        }
     1685
     1686        // Determine the min/max of the vector (which prior outliers masked out)
     1687        int numValid = vectorMinMax(myVector, mask, maskVal, statsMinMax); // Number of values
     1688        float min = statsMinMax->min;
     1689        float max = statsMinMax->max;
     1690        if (numValid == 0 || isnan(min) || isnan(max)) {
     1691            psError(PS_ERR_UNKNOWN, false, "Failed to calculate the min/max of the input vector.\n");
     1692            psFree(statsMinMax);
     1693            psTrace(TRACE, 4, "---- %s(false) end  ----\n", __func__);
     1694            return false;
     1695        }
     1696
     1697        // Calculate the number of bins.
     1698        // XXX can we calculate the binMin, binMax **before** building this histogram?
     1699        long numBins = (max - min) / binSize;
     1700        psTrace(TRACE, 6, "The new min/max values are (%f, %f).\n", min, max);
     1701        psTrace(TRACE, 6, "The new bin size is %f.\n", binSize);
     1702        psTrace(TRACE, 6, "The numBins is %ld\n", numBins);
     1703
     1704        psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers)
     1705        if (!psVectorHistogram(histogram, myVector, errors, mask, maskVal)) {
     1706            psError(PS_ERR_UNKNOWN, false, "Unable to generate histogram for fitted statistics v4.\n");
     1707            psFree(histogram);
     1708            psFree(statsMinMax);
     1709            return false;
     1710        }
     1711        if (psTraceGetLevel("psLib.math") >= 8) {
     1712            PS_VECTOR_PRINT_F32(histogram->nums);
     1713        }
     1714
     1715        // now fit a Gaussian to the upper and lower halves about the peak independently
     1716
     1717        // set the full-range upper and lower limits
     1718        psF32 maxFitSigma = 2.0;
     1719        if (isfinite(stats->clipSigma)) {
     1720            maxFitSigma = fabs(stats->clipSigma);
     1721        }
     1722        if (isfinite(stats->max)) {
     1723            maxFitSigma = fabs(stats->max);
     1724        }
     1725
     1726        psF32 minFitSigma = 2.0;
     1727        if (isfinite(stats->clipSigma)) {
     1728            minFitSigma = fabs(stats->clipSigma);
     1729        }
     1730        if (isfinite(stats->min)) {
     1731            minFitSigma = fabs(stats->min);
     1732        }
     1733
     1734        // select the min and max bins, saturating on the lower and upper end-points
     1735        long binMin, binMax;
     1736        PS_BIN_FOR_VALUE (binMin, histogram->bounds, guessMean - minFitSigma*guessStdev, 0);
     1737        PS_BIN_FOR_VALUE (binMax, histogram->bounds, guessMean + maxFitSigma*guessStdev, 0);
     1738        if (binMin == binMax) {
     1739            psError(PS_ERR_UNKNOWN, false, "Failed to calculate the min/max of the input vector.\n");
     1740            psFree(statsMinMax);
     1741            psTrace(TRACE, 4, "---- %s(false) end  ----\n", __func__);
     1742            return false;
     1743        }
     1744
     1745        // search for mode (peak of histogram within range mean-2sigma - mean+2sigma
     1746        long  binPeak = binMin;
     1747        float valPeak = histogram->nums->data.F32[binPeak];
     1748        for (int i = binMin; i < binMax; i++) {
     1749            if (histogram->nums->data.F32[i] > valPeak) {
     1750                binPeak = i;
     1751                valPeak = histogram->nums->data.F32[binPeak];
     1752            }
     1753            psTrace (TRACE, 6, "(%f = %.0f) ", histogram->bounds->data.F32[i], histogram->nums->data.F32[i]);
     1754        }
     1755        psTrace (TRACE, 6, "\n");
     1756
     1757        // assume a reasonably well-defined gaussian-like population; run from peak out until val < 0.25*peak
     1758
     1759        psTrace(TRACE, 6, "The clipped numBins is %ld\n", binMax - binMin);
     1760        psTrace(TRACE, 6, "The clipped min is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMin), binMin);
     1761        psTrace(TRACE, 6, "The clipped max is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMax - 1), binMax - 1);
     1762        psTrace(TRACE, 6, "The clipped peak is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binPeak), binPeak);
     1763        psTrace(TRACE, 6, "The clipped peak value is %f\n", histogram->nums->data.F32[binPeak]);
     1764
     1765        {
     1766            // fit the lower half of the distribution
     1767            // run down until we drop below 0.25*valPeak
     1768            // run up until we drop below 0.50*valPeak
     1769            long binS = binMin;
     1770            long binE = PS_MIN (binPeak + 3, binMax);
     1771            for (int i = binPeak - 3; i >= binMin; i--) {
     1772                if (histogram->nums->data.F32[i] < 0.25*valPeak) {
     1773                    binS = i;
     1774                    break;
     1775                }
     1776            }
     1777            for (int i = binPeak + 3; i < binMax; i++) {
     1778                if (histogram->nums->data.F32[i] < 0.50*valPeak) {
     1779                    binE = i;
     1780                    break;
     1781                }
     1782            }
     1783            psTrace(TRACE, 6, "Lower bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS);
     1784            psTrace(TRACE, 6, "Upper bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE);
     1785
     1786            psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates
     1787            psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates
     1788            long j = 0;
     1789            for (long i = binS; i < binE; i++) {
     1790                if (histogram->nums->data.F32[i] <= 0.0)
     1791                    continue;
     1792                x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i);
     1793                y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2)
     1794                j++;
     1795            }
     1796            y->n = x->n = j;
     1797
     1798            // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2
     1799            psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2);
     1800            bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x);
     1801            psFree(x);
     1802            psFree(y);
     1803
     1804            if (!status) {
     1805                psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n");
     1806                psFree(poly);
     1807                psFree(histogram);
     1808                psFree(statsMinMax);
     1809                psTrace(TRACE, 4, "---- %s(false) end  ----\n", __func__);
     1810                return false;
     1811            }
     1812
     1813            if (poly->coeff[2] >= 0.0) {
     1814                psTrace(TRACE, 6, "Failed parabolic fit: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1815                psFree(poly);
     1816                psFree(histogram);
     1817                psFree(statsMinMax);
     1818
     1819                // sometimes, the guessStdev is much too large.  in this case, the entire real population tends to be found
     1820                // in a single bin.  make one attempt to recover by dropping the guessStdev down by a jump and trying again
     1821                if (iteration == 0) {
     1822                    guessStdev = 0.25*guessStdev;
     1823                    psTrace(TRACE, 6, "*** retry, new stdev is %f.\n", guessStdev);
     1824                    continue;
     1825                }
     1826
     1827                psError(PS_ERR_UNKNOWN, false, "fit did not converge\n");
     1828                psTrace(TRACE, 4, "---- %s(false) end  ----\n", __func__);
     1829                return false;
     1830            }
     1831
     1832            // calculate lower mean & stdev from parabolic fit -- use this as the result
     1833            guessStdev = sqrt(-0.5/poly->coeff[2]);
     1834            guessMean = poly->coeff[1]*PS_SQR(guessStdev);
     1835            if (guessStdev > 0.75*stats->robustStdev) {
     1836                done = true;
     1837            }
     1838            psTrace(TRACE, 6, "Parabolic Lower fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1839            psTrace(TRACE, 6, "The lower mean  is %f.\n", guessMean);
     1840            psTrace(TRACE, 6, "The lower stdev is %f.\n", guessStdev);
     1841
     1842            psFree(poly);
     1843        }
     1844
     1845        // if we converge on a solution outside the range binMin - binMax, use a more conservative range
     1846        if (done && (guessMean > PS_BIN_MIDPOINT(histogram, binMax - 1))) {
     1847            psTrace(TRACE, 6, "Inconsistent result, re-trying the fit\n");
     1848
     1849            // fit a symmetric distribution
     1850            // run up until we drop below 0.15*valPeak
     1851            // run up until we drop below 0.15*valPeak
     1852            long binS = binMin;
     1853            long binE = binMax;
     1854            for (int i = binPeak - 3; i >= binMin; i--) {
     1855                if (histogram->nums->data.F32[i] < 0.15*valPeak) {
     1856                    binS = i;
     1857                    break;
     1858                }
     1859            }
     1860            for (int i = binPeak + 3; i < binMax; i++) {
     1861                if (histogram->nums->data.F32[i] < 0.15*valPeak) {
     1862                    binE = i;
     1863                    break;
     1864                }
     1865            }
     1866            psTrace(TRACE, 6, "Lower bound for symmetric range: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS);
     1867            psTrace(TRACE, 6, "Upper bound for symmetric range: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE);
     1868
     1869            psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates
     1870            psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates
     1871            long j = 0;
     1872            for (long i = binS; i < binE; i++) {
     1873                if (histogram->nums->data.F32[i] <= 0.0)
     1874                    continue;
     1875                x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i);
     1876                y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2)
     1877                j++;
     1878            }
     1879            y->n = x->n = j;
     1880
     1881            // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2
     1882            psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2);
     1883            bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x);
     1884            psFree(x);
     1885            psFree(y);
     1886
     1887            if (!status) {
     1888                psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n");
     1889                psFree(poly);
     1890                psFree(histogram);
     1891                psFree(statsMinMax);
     1892                psTrace(TRACE, 4, "---- %s(false) end  ----\n", __func__);
     1893                return false;
     1894            }
     1895
     1896            // calculate upper mean & stdev from parabolic fit -- ignore this value
     1897#ifndef PS_NO_TRACE
     1898            guessStdev = sqrt(-0.5/poly->coeff[2]);
     1899            guessMean = poly->coeff[1]*PS_SQR(guessStdev);
     1900            psTrace(TRACE, 6, "Parabolic Symmetric fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1901            psTrace(TRACE, 6, "The symmetric mean  is %f.\n", guessMean);
     1902            psTrace(TRACE, 6, "The symmetric stdev is %f.\n", guessStdev);
     1903#endif
     1904
     1905            psFree (poly);
     1906        }
     1907
     1908        // Clean up after fitting
     1909        psFree (histogram);
     1910        psFree (statsMinMax);
     1911    }
     1912
     1913    // The fitted mean is the Gaussian mean.
     1914    stats->fittedMean = guessMean;
     1915    psTrace(TRACE, 6, "The fitted mean is %f.\n", stats->fittedMean);
     1916
     1917    // The fitted standard deviation
     1918    stats->fittedStdev = guessStdev;
     1919    psTrace(TRACE, 6, "The fitted stdev is %f.\n", stats->fittedStdev);
     1920
     1921    stats->results |= PS_STAT_FITTED_MEAN_V4;
     1922    stats->results |= PS_STAT_FITTED_STDEV_V4;
    16211923
    16221924    return true;
     
    17482050    psStats *stats = p_psAlloc(file, lineno, func, sizeof(psStats));
    17492051    psMemSetDeallocator(stats, (psFreeFunc)statsFree);
     2052
     2053    // set initial, default values
     2054    psStatsInit (stats);
     2055
     2056    // these values are can be set as desired by the user.  they are not affected by
     2057    // psStatsInit
     2058    stats->clipSigma = 3.0;
     2059    stats->clipIter = 3;
     2060    stats->nSubsample = 100000;
     2061    stats->options = options;
     2062
     2063    psTrace(TRACE, 3, "---- %s() end  ----\n", __func__);
     2064    return stats;
     2065}
     2066
     2067// reset the values which are output, and which may be used from one psStats stage to the next
     2068void psStatsInit(psStats *stats)
     2069{
    17502070    stats->sampleMean = NAN;
    17512071    stats->sampleMedian = NAN;
     
    17662086    stats->clippedStdev = NAN;
    17672087    stats->clippedNvalues = -1;     // XXX: This is never used
    1768     stats->clipSigma = 3.0;
    1769     stats->clipIter = 3;
    17702088    stats->min = NAN;
    17712089    stats->max = NAN;
    17722090    stats->binsize = NAN;
    1773     stats->nSubsample = 100000;
    1774     stats->options = options;
    1775 
    1776     psTrace(TRACE, 3, "---- %s() end  ----\n", __func__);
    1777     return stats;
     2091    return;
    17782092}
    17792093
     
    19282242        }
    19292243        if (!vectorFittedStats_v3(inF32, errorsF32, maskU8, maskVal, stats)) {
     2244            psError(PS_ERR_UNKNOWN, false, _("Failed to calculate fitted statistics"));
     2245            status &= false;
     2246        }
     2247    }
     2248
     2249    // ************************************************************************
     2250    if (stats->options & (PS_STAT_FITTED_MEAN_V4 | PS_STAT_FITTED_STDEV_V4)) {
     2251        if (stats->options & (PS_STAT_FITTED_MEAN | PS_STAT_FITTED_STDEV)) {
     2252            psAbort("you may not specify both FITTED_MEAN and FITTED_MEAN_V4");
     2253        }
     2254        if (!vectorFittedStats_v4(inF32, errorsF32, maskU8, maskVal, stats)) {
    19302255            psError(PS_ERR_UNKNOWN, false, _("Failed to calculate fitted statistics"));
    19312256            status &= false;
     
    19802305    READ_STAT("FITTED_MEAN_V3",  PS_STAT_FITTED_MEAN_V3);
    19812306    READ_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3);
     2307    READ_STAT("FITTED_V4",       PS_STAT_FITTED_MEAN_V4);
     2308    READ_STAT("FITTED_MEAN_V4",  PS_STAT_FITTED_MEAN_V4);
     2309    READ_STAT("FITTED_STDEV_V4", PS_STAT_FITTED_STDEV_V4);
    19822310    READ_STAT("CLIPPED",         PS_STAT_CLIPPED_MEAN);
    19832311    READ_STAT("CLIPPED_MEAN",    PS_STAT_CLIPPED_MEAN);
     
    20132341    WRITE_STAT("FITTED_MEAN_V3",  PS_STAT_FITTED_MEAN_V3);
    20142342    WRITE_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3);
     2343    WRITE_STAT("FITTED_MEAN_V4",  PS_STAT_FITTED_MEAN_V4);
     2344    WRITE_STAT("FITTED_STDEV_V4", PS_STAT_FITTED_STDEV_V4);
    20152345    WRITE_STAT("CLIPPED_MEAN",    PS_STAT_CLIPPED_MEAN);
    20162346    WRITE_STAT("CLIPPED_STDEV",   PS_STAT_CLIPPED_STDEV);
     
    20672397      case PS_STAT_FITTED_MEAN_V3:
    20682398      case PS_STAT_FITTED_STDEV_V3:
     2399      case PS_STAT_FITTED_MEAN_V4:
     2400      case PS_STAT_FITTED_STDEV_V4:
    20692401      case PS_STAT_CLIPPED_MEAN:
    20702402      case PS_STAT_CLIPPED_STDEV:
     
    21092441      case PS_STAT_FITTED_STDEV_V3:
    21102442        return stats->fittedStdev;
     2443      case PS_STAT_FITTED_MEAN_V4:
     2444        return stats->fittedMean;
     2445      case PS_STAT_FITTED_STDEV_V4:
     2446        return stats->fittedStdev;
    21112447      case PS_STAT_CLIPPED_MEAN:
    21122448        return stats->clippedMean;
Note: See TracChangeset for help on using the changeset viewer.