IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Jan 18, 2007, 6:32:27 PM (19 years ago)
Author:
magnier
Message:

added FITTED_MEAN_V3: an asymmetric (top-heavy) population; better names for this and V2 are warrented

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/math/psStats.c

    r10999 r11155  
    1313 * use ->min and ->max (PS_STAT_USE_RANGE)
    1414 *
    15  *  @version $Revision: 1.198 $ $Name: not supported by cvs2svn $
    16  *  @date $Date: 2007-01-09 22:38:53 $
     15 *  @version $Revision: 1.199 $ $Name: not supported by cvs2svn $
     16 *  @date $Date: 2007-01-19 04:32:27 $
    1717 *
    1818 *  Copyright 2006 IfA, University of Hawaii
     
    11321132    float guessMean = stats->robustMedian;  // pass the guess mean
    11331133
    1134     psTrace("psLib.math", 6, "The guess mean  is %f.\n", guessMean);
    1135     psTrace("psLib.math", 6, "The guess stdev is %f.\n", guessStdev);
     1134    psTrace("psLib.math", 6, "The ** starting ** guess mean  is %f.\n", guessMean);
     1135    psTrace("psLib.math", 6, "The ** starting ** guess stdev is %f.\n", guessStdev);
    11361136
    11371137    bool done = false;
     
    12581258            psFree(y);
    12591259            psFree(poly);
    1260             // psFree(fitStats);
    1261             // psFree(fitMask);
     1260            psFree(histogram);
     1261            psFree(statsMinMax);
     1262            psTrace("psLib.math", 4, "---- %s(false) end  ----\n", __func__);
     1263            return false;
     1264        }
     1265
     1266        if (poly->coeff[2] >= 0.0) {
     1267            psTrace("psLib.math", 6, "Parabolic fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1268            psError(PS_ERR_UNKNOWN, false, "fit did not converge\n");
     1269            psFree(x);
     1270            psFree(y);
     1271            psFree(poly);
    12621272            psFree(histogram);
    12631273            psFree(statsMinMax);
     
    12711281            done = true;
    12721282        } else {
     1283            psTrace("psLib.math", 6, "Parabolic fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
    12731284            psTrace("psLib.math", 6, "The new guess mean  is %f.\n", guessMean);
    12741285            psTrace("psLib.math", 6, "The new guess stdev is %f.\n", guessStdev);
     
    12951306    stats->results |= PS_STAT_FITTED_MEAN_V2;
    12961307    stats->results |= PS_STAT_FITTED_STDEV_V2;
     1308
     1309    return true;
     1310}
     1311
     1312/********************
     1313 * perform an asymmetric fit to the population
     1314 * vectorFittedStats_v3 requires guess for fittedMean and fittedStdev
     1315 * robustN50 should also be set
     1316 * gaussian fit is performed using 2D polynomial to ln(y)
     1317 ********************/
     1318static bool vectorFittedStats_v3 (const psVector* myVector,
     1319                                  const psVector* errors,
     1320                                  psVector* mask,
     1321                                  psMaskType maskVal,
     1322                                  psStats* stats)
     1323{
     1324
     1325    // This procedure requires the mean.  If it has not been already
     1326    // calculated, then call vectorSampleMean()
     1327    if (!(stats->results & PS_STAT_ROBUST_MEDIAN)) {
     1328        vectorRobustStats(myVector, errors, mask, maskVal, stats);
     1329    }
     1330
     1331    // If the mean is NAN, then generate a warning and set the stdev to NAN.
     1332    if (isnan(stats->robustMedian)) {
     1333        stats->fittedStdev = NAN;
     1334        stats->fittedStdev = NAN;
     1335        psTrace("psLib.math", 4, "---- %s() end ----\n", __func__);
     1336        return false;
     1337    }
     1338
     1339    float guessStdev = stats->robustStdev;  // pass the guess sigma
     1340    float guessMean = stats->robustMedian;  // pass the guess mean
     1341
     1342    psTrace("psLib.math", 6, "The ** starting ** guess mean  is %f.\n", guessMean);
     1343    psTrace("psLib.math", 6, "The ** starting ** guess stdev is %f.\n", guessStdev);
     1344
     1345    bool done = false;
     1346    for (int iteration = 0; !done && (iteration < 2); iteration ++) {
     1347        psStats *statsMinMax = psStatsAlloc(PS_STAT_MIN | PS_STAT_MAX); // Statistics for min and max
     1348
     1349        psScalar tmpScalar;                 // Temporary scalar variable, for p_psVectorBinDisect
     1350        tmpScalar.type.type = PS_TYPE_F32;
     1351
     1352        psF32 binSize = 1;
     1353        if (stats->options & PS_STAT_USE_BINSIZE) {
     1354            // Set initial bin size to the specified value.
     1355            binSize = stats->binsize;
     1356            psTrace("psLib.math", 6, "Setting initial robust bin size to %.2f\n", binSize);
     1357        } else {
     1358            // construct a histogram with (sigma/2 < binsize < sigma)
     1359            // set roughly so that the lowest bins have about 2 cnts
     1360            // Nsmallest ~ N50 / (4*dN))
     1361            psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8));
     1362            binSize = guessStdev / dN;
     1363        }
     1364
     1365        // Determine the min/max of the vector (which prior outliers masked out)
     1366        int numValid = vectorMinMax(myVector, mask, maskVal, statsMinMax); // Number of values
     1367        float min = statsMinMax->min;
     1368        float max = statsMinMax->max;
     1369        if (numValid == 0 || isnan(min) || isnan(max)) {
     1370            psError(PS_ERR_UNKNOWN, false, "Failed to calculate the min/max of the input vector.\n");
     1371            psFree(statsMinMax);
     1372            psTrace("psLib.math", 4, "---- %s(false) end  ----\n", __func__);
     1373            return false;
     1374        }
     1375
     1376        // Calculate the number of bins.
     1377        // XXX can we calculate the binMin, binMax **before** building this histogram?
     1378        long numBins = (max - min) / binSize;
     1379        psTrace("psLib.math", 6, "The new min/max values are (%f, %f).\n", min, max);
     1380        psTrace("psLib.math", 6, "The new bin size is %f.\n", binSize);
     1381        psTrace("psLib.math", 6, "The numBins is %ld\n", numBins);
     1382
     1383        psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers)
     1384        histogram = psVectorHistogram(histogram, myVector, errors, mask, maskVal);
     1385        if (psTraceGetLevel("psLib.math") >= 8) {
     1386            PS_VECTOR_PRINT_F32(histogram->nums);
     1387        }
     1388
     1389        // now fit a Gaussian to the upper and lower halves about the peak independently
     1390
     1391        long binMin, binMax;
     1392        float upperMean, upperStdev;
     1393
     1394        // set the full-range upper and lower limits
     1395        psF32 maxFitSigma = 2.0;
     1396        if (isfinite(stats->clipSigma)) {
     1397            maxFitSigma = fabs(stats->clipSigma);
     1398        }
     1399        if (isfinite(stats->max)) {
     1400            maxFitSigma = fabs(stats->max);
     1401        }
     1402
     1403        psF32 minFitSigma = 2.0;
     1404        if (isfinite(stats->clipSigma)) {
     1405            minFitSigma = fabs(stats->clipSigma);
     1406        }
     1407        if (isfinite(stats->min)) {
     1408            minFitSigma = fabs(stats->min);
     1409        }
     1410
     1411        tmpScalar.data.F32 = guessMean - minFitSigma*guessStdev;
     1412        if (tmpScalar.data.F32 < min) {
     1413            binMin = 0;
     1414        } else {
     1415            binMin = p_psVectorBinDisect(histogram->bounds, &tmpScalar);
     1416        }
     1417        tmpScalar.data.F32 = guessMean + maxFitSigma*guessStdev;
     1418        if (tmpScalar.data.F32 > max) {
     1419            binMax = histogram->bounds->n;
     1420        } else {
     1421            binMax = p_psVectorBinDisect(histogram->bounds, &tmpScalar) + 1;
     1422        }
     1423        if (binMin < 0 || binMax < 0) {
     1424            psError(PS_ERR_UNKNOWN, false, "Failed to calculate the -%f sigma : +%f sigma bins\n", minFitSigma, maxFitSigma);
     1425            psFree(statsMinMax);
     1426            psFree(histogram);
     1427            psTrace("psLib.math", 4, "---- %s(false) end  ----\n", __func__);
     1428            return false;
     1429        }
     1430
     1431        // search for mode (peak of histogram within range mean-2sigma - mean+2sigma
     1432        long  binPeak = binMin;
     1433        float valPeak = histogram->nums->data.F32[binPeak];
     1434        for (int i = binMin; i < binMax; i++) {
     1435            if (histogram->nums->data.F32[i] > valPeak) {
     1436                binPeak = i;
     1437                valPeak = histogram->nums->data.F32[binPeak];
     1438            }
     1439        }
     1440
     1441        // assume a reasonably well-defined gaussian-like population; run from peak out until val < 0.25*peak
     1442
     1443        psTrace("psLib.math", 6, "The clipped numBins is %ld\n", binMax - binMin);
     1444        psTrace("psLib.math", 6, "The clipped min is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMin), binMin);
     1445        psTrace("psLib.math", 6, "The clipped max is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMax), binMax);
     1446        psTrace("psLib.math", 6, "The clipped peak is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binPeak), binPeak);
     1447
     1448        {
     1449            // fit the lower half of the distribution
     1450            // run down until we drop below 0.25*valPeak
     1451            long binS = binMin;
     1452            long binE = PS_MIN (binPeak + 3, binMax);
     1453            for (int i = binPeak-3; i >= binMin; i--) {
     1454                if (histogram->nums->data.F32[i] < 0.25*valPeak) {
     1455                    binS = i;
     1456                    break;
     1457                }
     1458            }
     1459            psTrace("psLib.math", 6, "Lower bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS);
     1460            psTrace("psLib.math", 6, "Upper bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE);
     1461
     1462            psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates
     1463            psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates
     1464            long j = 0;
     1465            for (long i = binS; i < binE; i++) {
     1466                if (histogram->nums->data.F32[i] <= 0.0)
     1467                    continue;
     1468                x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i);
     1469                y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2)
     1470                j++;
     1471            }
     1472            y->n = x->n = j;
     1473
     1474            // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2
     1475            psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2);
     1476            bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x);
     1477            psFree(x);
     1478            psFree(y);
     1479
     1480            if (!status) {
     1481                psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n");
     1482                psFree(poly);
     1483                psFree(histogram);
     1484                psFree(statsMinMax);
     1485                psTrace("psLib.math", 4, "---- %s(false) end  ----\n", __func__);
     1486                return false;
     1487            }
     1488
     1489            if (poly->coeff[2] >= 0.0) {
     1490                psTrace("psLib.math", 6, "Failed parabolic fit: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1491                psFree(poly);
     1492                psFree(histogram);
     1493                psFree(statsMinMax);
     1494
     1495                // sometimes, the guessStdev is much too large.  in this case, the entire real population tends to be found
     1496                // in a single bin.  make one attempt to recover by dropping the guessStdev down by a jump and trying again
     1497                if (iteration == 0) {
     1498                    guessStdev = 0.25*guessStdev;
     1499                    psTrace("psLib.math", 6, "*** retry stdev is %f.\n", guessStdev);
     1500                    continue;
     1501                }
     1502
     1503                psError(PS_ERR_UNKNOWN, false, "fit did not converge\n");
     1504                psTrace("psLib.math", 4, "---- %s(false) end  ----\n", __func__);
     1505                return false;
     1506            }
     1507
     1508            // calculate lower mean & stdev from parabolic fit -- use this as the result
     1509            guessStdev = sqrt(-0.5/poly->coeff[2]);
     1510            guessMean = poly->coeff[1]*PS_SQR(guessStdev);
     1511            if (guessStdev > 0.75*stats->robustStdev) {
     1512                done = true;
     1513            }
     1514            psTrace("psLib.math", 6, "Parabolic Lower fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1515            psTrace("psLib.math", 6, "The lower mean  is %f.\n", guessMean);
     1516            psTrace("psLib.math", 6, "The lower stdev is %f.\n", guessStdev);
     1517
     1518            psFree(poly);
     1519        }
     1520
     1521        {
     1522            // fit the upper half of the distribution
     1523            // run up until we drop below 0.25*valPeak
     1524            long binS = PS_MAX (binPeak - 3, 0);
     1525            long binE = binMax;
     1526            for (int i = binPeak+3; i < binMax; i++) {
     1527                if (histogram->nums->data.F32[i] < 0.25*valPeak) {
     1528                    binE = i;
     1529                    break;
     1530                }
     1531            }
     1532            psTrace("psLib.math", 6, "Lower bound for upper half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS);
     1533            psTrace("psLib.math", 6, "Upper bound for upper half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE);
     1534
     1535            psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates
     1536            psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates
     1537            long j = 0;
     1538            for (long i = binS; i < binE; i++) {
     1539                if (histogram->nums->data.F32[i] <= 0.0)
     1540                    continue;
     1541                x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i);
     1542                y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2)
     1543                j++;
     1544            }
     1545            y->n = x->n = j;
     1546
     1547            // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2
     1548            psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2);
     1549            bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x);
     1550            psFree(x);
     1551            psFree(y);
     1552
     1553            if (!status) {
     1554                psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n");
     1555                psFree(poly);
     1556                psFree(histogram);
     1557                psFree(statsMinMax);
     1558                psTrace("psLib.math", 4, "---- %s(false) end  ----\n", __func__);
     1559                return false;
     1560            }
     1561
     1562            // calculate upper mean & stdev from parabolic fit -- use this as the result
     1563            upperStdev = sqrt(-0.5/poly->coeff[2]);
     1564            upperMean = poly->coeff[1]*PS_SQR(upperStdev);
     1565            psTrace("psLib.math", 6, "Parabolic Upper fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]);
     1566            psTrace("psLib.math", 6, "The upper mean  is %f.\n", upperMean);
     1567            psTrace("psLib.math", 6, "The upper stdev is %f.\n", upperStdev);
     1568
     1569            psFree (poly);
     1570        }
     1571
     1572        // Clean up after fitting
     1573        psFree (histogram);
     1574        psFree (statsMinMax);
     1575    }
     1576
     1577    // The fitted mean is the Gaussian mean.
     1578    stats->fittedMean = guessMean;
     1579    psTrace("psLib.math", 6, "The fitted mean is %f.\n", stats->fittedMean);
     1580
     1581    // The fitted standard deviation
     1582    stats->fittedStdev = guessStdev;
     1583    psTrace("psLib.math", 6, "The fitted stdev is %f.\n", stats->fittedStdev);
     1584
     1585    stats->results |= PS_STAT_FITTED_MEAN_V3;
     1586    stats->results |= PS_STAT_FITTED_STDEV_V3;
    12971587
    12981588    return true;
     
    15891879
    15901880    // ************************************************************************
     1881    if (stats->options & (PS_STAT_FITTED_MEAN_V3 | PS_STAT_FITTED_STDEV_V3)) {
     1882        if (stats->options & (PS_STAT_FITTED_MEAN | PS_STAT_FITTED_STDEV)) {
     1883            psAbort ("stats", "you may not specify both FITTED_MEAN and FITTED_MEAN_V3");
     1884        }
     1885        if (!vectorFittedStats_v3(inF32, errorsF32, maskU8, maskVal, stats)) {
     1886            psError(PS_ERR_UNKNOWN, false, _("Failed to calculate fitted statistics"));
     1887            status &= false;
     1888        }
     1889    }
     1890
     1891    // ************************************************************************
    15911892    if ((stats->options & PS_STAT_CLIPPED_MEAN) || (stats->options & PS_STAT_CLIPPED_STDEV)) {
    15921893        if (!vectorClippedStats(inF32, errorsF32, maskU8, maskVal, stats)) {
     
    16281929    READ_STAT("FITTED_MEAN_V2",  PS_STAT_FITTED_MEAN_V2);
    16291930    READ_STAT("FITTED_STDEV_V2", PS_STAT_FITTED_STDEV_V2);
     1931    READ_STAT("FITTED_V3",       PS_STAT_FITTED_MEAN_V3);
     1932    READ_STAT("FITTED_MEAN_V3",  PS_STAT_FITTED_MEAN_V3);
     1933    READ_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3);
    16301934    READ_STAT("CLIPPED",         PS_STAT_CLIPPED_MEAN);
    16311935    READ_STAT("CLIPPED_MEAN",    PS_STAT_CLIPPED_MEAN);
     
    16571961    WRITE_STAT("FITTED_MEAN_V2",  PS_STAT_FITTED_MEAN_V2);
    16581962    WRITE_STAT("FITTED_STDEV_V2", PS_STAT_FITTED_STDEV_V2);
     1963    WRITE_STAT("FITTED_MEAN_V3",  PS_STAT_FITTED_MEAN_V3);
     1964    WRITE_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3);
    16591965    WRITE_STAT("CLIPPED_MEAN",    PS_STAT_CLIPPED_MEAN);
    16601966    WRITE_STAT("CLIPPED_STDEV",   PS_STAT_CLIPPED_STDEV);
     
    17082014    case PS_STAT_FITTED_MEAN_V2:
    17092015    case PS_STAT_FITTED_STDEV_V2:
     2016    case PS_STAT_FITTED_MEAN_V3:
     2017    case PS_STAT_FITTED_STDEV_V3:
    17102018    case PS_STAT_CLIPPED_MEAN:
    17112019    case PS_STAT_CLIPPED_STDEV:
     
    17422050    case PS_STAT_FITTED_STDEV_V2:
    17432051        return stats->fittedStdev;
     2052    case PS_STAT_FITTED_MEAN_V3:
     2053        return stats->fittedMean;
     2054    case PS_STAT_FITTED_STDEV_V3:
     2055        return stats->fittedStdev;
    17442056    case PS_STAT_CLIPPED_MEAN:
    17452057        return stats->clippedMean;
Note: See TracChangeset for help on using the changeset viewer.