Changeset 11155 for trunk/psLib/src/math/psStats.c
- Timestamp:
- Jan 18, 2007, 6:32:27 PM (19 years ago)
- File:
-
- 1 edited
-
trunk/psLib/src/math/psStats.c (modified) (10 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/psLib/src/math/psStats.c
r10999 r11155 13 13 * use ->min and ->max (PS_STAT_USE_RANGE) 14 14 * 15 * @version $Revision: 1.19 8$ $Name: not supported by cvs2svn $16 * @date $Date: 2007-01- 09 22:38:53$15 * @version $Revision: 1.199 $ $Name: not supported by cvs2svn $ 16 * @date $Date: 2007-01-19 04:32:27 $ 17 17 * 18 18 * Copyright 2006 IfA, University of Hawaii … … 1132 1132 float guessMean = stats->robustMedian; // pass the guess mean 1133 1133 1134 psTrace("psLib.math", 6, "The guess mean is %f.\n", guessMean);1135 psTrace("psLib.math", 6, "The guess stdev is %f.\n", guessStdev);1134 psTrace("psLib.math", 6, "The ** starting ** guess mean is %f.\n", guessMean); 1135 psTrace("psLib.math", 6, "The ** starting ** guess stdev is %f.\n", guessStdev); 1136 1136 1137 1137 bool done = false; … … 1258 1258 psFree(y); 1259 1259 psFree(poly); 1260 // psFree(fitStats); 1261 // psFree(fitMask); 1260 psFree(histogram); 1261 psFree(statsMinMax); 1262 psTrace("psLib.math", 4, "---- %s(false) end ----\n", __func__); 1263 return false; 1264 } 1265 1266 if (poly->coeff[2] >= 0.0) { 1267 psTrace("psLib.math", 6, "Parabolic fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1268 psError(PS_ERR_UNKNOWN, false, "fit did not converge\n"); 1269 psFree(x); 1270 psFree(y); 1271 psFree(poly); 1262 1272 psFree(histogram); 1263 1273 psFree(statsMinMax); … … 1271 1281 done = true; 1272 1282 } else { 1283 psTrace("psLib.math", 6, "Parabolic fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1273 1284 psTrace("psLib.math", 6, "The new guess mean is %f.\n", guessMean); 1274 1285 psTrace("psLib.math", 6, "The new guess stdev is %f.\n", guessStdev); … … 1295 1306 stats->results |= PS_STAT_FITTED_MEAN_V2; 1296 1307 stats->results |= PS_STAT_FITTED_STDEV_V2; 1308 1309 return true; 1310 } 1311 1312 /******************** 1313 * perform an asymmetric fit to the population 1314 * vectorFittedStats_v3 requires guess for fittedMean and fittedStdev 1315 * robustN50 should also be set 1316 * gaussian fit is performed using 2D polynomial to ln(y) 1317 ********************/ 1318 static bool vectorFittedStats_v3 (const psVector* myVector, 1319 const psVector* errors, 1320 psVector* mask, 1321 psMaskType maskVal, 1322 psStats* stats) 1323 { 1324 1325 // This procedure requires the mean. If it has not been already 1326 // calculated, then call vectorSampleMean() 1327 if (!(stats->results & PS_STAT_ROBUST_MEDIAN)) { 1328 vectorRobustStats(myVector, errors, mask, maskVal, stats); 1329 } 1330 1331 // If the mean is NAN, then generate a warning and set the stdev to NAN. 1332 if (isnan(stats->robustMedian)) { 1333 stats->fittedStdev = NAN; 1334 stats->fittedStdev = NAN; 1335 psTrace("psLib.math", 4, "---- %s() end ----\n", __func__); 1336 return false; 1337 } 1338 1339 float guessStdev = stats->robustStdev; // pass the guess sigma 1340 float guessMean = stats->robustMedian; // pass the guess mean 1341 1342 psTrace("psLib.math", 6, "The ** starting ** guess mean is %f.\n", guessMean); 1343 psTrace("psLib.math", 6, "The ** starting ** guess stdev is %f.\n", guessStdev); 1344 1345 bool done = false; 1346 for (int iteration = 0; !done && (iteration < 2); iteration ++) { 1347 psStats *statsMinMax = psStatsAlloc(PS_STAT_MIN | PS_STAT_MAX); // Statistics for min and max 1348 1349 psScalar tmpScalar; // Temporary scalar variable, for p_psVectorBinDisect 1350 tmpScalar.type.type = PS_TYPE_F32; 1351 1352 psF32 binSize = 1; 1353 if (stats->options & PS_STAT_USE_BINSIZE) { 1354 // Set initial bin size to the specified value. 1355 binSize = stats->binsize; 1356 psTrace("psLib.math", 6, "Setting initial robust bin size to %.2f\n", binSize); 1357 } else { 1358 // construct a histogram with (sigma/2 < binsize < sigma) 1359 // set roughly so that the lowest bins have about 2 cnts 1360 // Nsmallest ~ N50 / (4*dN)) 1361 psF32 dN = PS_MAX (1, PS_MIN (4, stats->robustN50 / 8)); 1362 binSize = guessStdev / dN; 1363 } 1364 1365 // Determine the min/max of the vector (which prior outliers masked out) 1366 int numValid = vectorMinMax(myVector, mask, maskVal, statsMinMax); // Number of values 1367 float min = statsMinMax->min; 1368 float max = statsMinMax->max; 1369 if (numValid == 0 || isnan(min) || isnan(max)) { 1370 psError(PS_ERR_UNKNOWN, false, "Failed to calculate the min/max of the input vector.\n"); 1371 psFree(statsMinMax); 1372 psTrace("psLib.math", 4, "---- %s(false) end ----\n", __func__); 1373 return false; 1374 } 1375 1376 // Calculate the number of bins. 1377 // XXX can we calculate the binMin, binMax **before** building this histogram? 1378 long numBins = (max - min) / binSize; 1379 psTrace("psLib.math", 6, "The new min/max values are (%f, %f).\n", min, max); 1380 psTrace("psLib.math", 6, "The new bin size is %f.\n", binSize); 1381 psTrace("psLib.math", 6, "The numBins is %ld\n", numBins); 1382 1383 psHistogram *histogram = psHistogramAlloc(min, max, numBins); // A new histogram (without outliers) 1384 histogram = psVectorHistogram(histogram, myVector, errors, mask, maskVal); 1385 if (psTraceGetLevel("psLib.math") >= 8) { 1386 PS_VECTOR_PRINT_F32(histogram->nums); 1387 } 1388 1389 // now fit a Gaussian to the upper and lower halves about the peak independently 1390 1391 long binMin, binMax; 1392 float upperMean, upperStdev; 1393 1394 // set the full-range upper and lower limits 1395 psF32 maxFitSigma = 2.0; 1396 if (isfinite(stats->clipSigma)) { 1397 maxFitSigma = fabs(stats->clipSigma); 1398 } 1399 if (isfinite(stats->max)) { 1400 maxFitSigma = fabs(stats->max); 1401 } 1402 1403 psF32 minFitSigma = 2.0; 1404 if (isfinite(stats->clipSigma)) { 1405 minFitSigma = fabs(stats->clipSigma); 1406 } 1407 if (isfinite(stats->min)) { 1408 minFitSigma = fabs(stats->min); 1409 } 1410 1411 tmpScalar.data.F32 = guessMean - minFitSigma*guessStdev; 1412 if (tmpScalar.data.F32 < min) { 1413 binMin = 0; 1414 } else { 1415 binMin = p_psVectorBinDisect(histogram->bounds, &tmpScalar); 1416 } 1417 tmpScalar.data.F32 = guessMean + maxFitSigma*guessStdev; 1418 if (tmpScalar.data.F32 > max) { 1419 binMax = histogram->bounds->n; 1420 } else { 1421 binMax = p_psVectorBinDisect(histogram->bounds, &tmpScalar) + 1; 1422 } 1423 if (binMin < 0 || binMax < 0) { 1424 psError(PS_ERR_UNKNOWN, false, "Failed to calculate the -%f sigma : +%f sigma bins\n", minFitSigma, maxFitSigma); 1425 psFree(statsMinMax); 1426 psFree(histogram); 1427 psTrace("psLib.math", 4, "---- %s(false) end ----\n", __func__); 1428 return false; 1429 } 1430 1431 // search for mode (peak of histogram within range mean-2sigma - mean+2sigma 1432 long binPeak = binMin; 1433 float valPeak = histogram->nums->data.F32[binPeak]; 1434 for (int i = binMin; i < binMax; i++) { 1435 if (histogram->nums->data.F32[i] > valPeak) { 1436 binPeak = i; 1437 valPeak = histogram->nums->data.F32[binPeak]; 1438 } 1439 } 1440 1441 // assume a reasonably well-defined gaussian-like population; run from peak out until val < 0.25*peak 1442 1443 psTrace("psLib.math", 6, "The clipped numBins is %ld\n", binMax - binMin); 1444 psTrace("psLib.math", 6, "The clipped min is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMin), binMin); 1445 psTrace("psLib.math", 6, "The clipped max is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binMax), binMax); 1446 psTrace("psLib.math", 6, "The clipped peak is %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binPeak), binPeak); 1447 1448 { 1449 // fit the lower half of the distribution 1450 // run down until we drop below 0.25*valPeak 1451 long binS = binMin; 1452 long binE = PS_MIN (binPeak + 3, binMax); 1453 for (int i = binPeak-3; i >= binMin; i--) { 1454 if (histogram->nums->data.F32[i] < 0.25*valPeak) { 1455 binS = i; 1456 break; 1457 } 1458 } 1459 psTrace("psLib.math", 6, "Lower bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS); 1460 psTrace("psLib.math", 6, "Upper bound for lower half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE); 1461 1462 psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates 1463 psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates 1464 long j = 0; 1465 for (long i = binS; i < binE; i++) { 1466 if (histogram->nums->data.F32[i] <= 0.0) 1467 continue; 1468 x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i); 1469 y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2) 1470 j++; 1471 } 1472 y->n = x->n = j; 1473 1474 // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2 1475 psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2); 1476 bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x); 1477 psFree(x); 1478 psFree(y); 1479 1480 if (!status) { 1481 psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n"); 1482 psFree(poly); 1483 psFree(histogram); 1484 psFree(statsMinMax); 1485 psTrace("psLib.math", 4, "---- %s(false) end ----\n", __func__); 1486 return false; 1487 } 1488 1489 if (poly->coeff[2] >= 0.0) { 1490 psTrace("psLib.math", 6, "Failed parabolic fit: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1491 psFree(poly); 1492 psFree(histogram); 1493 psFree(statsMinMax); 1494 1495 // sometimes, the guessStdev is much too large. in this case, the entire real population tends to be found 1496 // in a single bin. make one attempt to recover by dropping the guessStdev down by a jump and trying again 1497 if (iteration == 0) { 1498 guessStdev = 0.25*guessStdev; 1499 psTrace("psLib.math", 6, "*** retry stdev is %f.\n", guessStdev); 1500 continue; 1501 } 1502 1503 psError(PS_ERR_UNKNOWN, false, "fit did not converge\n"); 1504 psTrace("psLib.math", 4, "---- %s(false) end ----\n", __func__); 1505 return false; 1506 } 1507 1508 // calculate lower mean & stdev from parabolic fit -- use this as the result 1509 guessStdev = sqrt(-0.5/poly->coeff[2]); 1510 guessMean = poly->coeff[1]*PS_SQR(guessStdev); 1511 if (guessStdev > 0.75*stats->robustStdev) { 1512 done = true; 1513 } 1514 psTrace("psLib.math", 6, "Parabolic Lower fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1515 psTrace("psLib.math", 6, "The lower mean is %f.\n", guessMean); 1516 psTrace("psLib.math", 6, "The lower stdev is %f.\n", guessStdev); 1517 1518 psFree(poly); 1519 } 1520 1521 { 1522 // fit the upper half of the distribution 1523 // run up until we drop below 0.25*valPeak 1524 long binS = PS_MAX (binPeak - 3, 0); 1525 long binE = binMax; 1526 for (int i = binPeak+3; i < binMax; i++) { 1527 if (histogram->nums->data.F32[i] < 0.25*valPeak) { 1528 binE = i; 1529 break; 1530 } 1531 } 1532 psTrace("psLib.math", 6, "Lower bound for upper half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binS), binS); 1533 psTrace("psLib.math", 6, "Upper bound for upper half: %f (%ld)\n", PS_BIN_MIDPOINT(histogram, binE), binE); 1534 1535 psVector *y = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of coordinates 1536 psVector *x = psVectorAllocEmpty(binE - binS, PS_TYPE_F32); // Vector of ordinates 1537 long j = 0; 1538 for (long i = binS; i < binE; i++) { 1539 if (histogram->nums->data.F32[i] <= 0.0) 1540 continue; 1541 x->data.F32[j] = PS_BIN_MIDPOINT(histogram, i); 1542 y->data.F32[j] = log(histogram->nums->data.F32[i]); // note this is the natural log: expected distribution is A exp(-(x-xo)^2/2sigma^2) 1543 j++; 1544 } 1545 y->n = x->n = j; 1546 1547 // fit 2nd order polynomial to ln(y) = -(x-xo)^2/2sigma^2 1548 psPolynomial1D *poly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, 2); 1549 bool status = psVectorFitPolynomial1D (poly, NULL, 0, y, NULL, x); 1550 psFree(x); 1551 psFree(y); 1552 1553 if (!status) { 1554 psError(PS_ERR_UNKNOWN, false, "Failed to fit a gaussian to the robust histogram.\n"); 1555 psFree(poly); 1556 psFree(histogram); 1557 psFree(statsMinMax); 1558 psTrace("psLib.math", 4, "---- %s(false) end ----\n", __func__); 1559 return false; 1560 } 1561 1562 // calculate upper mean & stdev from parabolic fit -- use this as the result 1563 upperStdev = sqrt(-0.5/poly->coeff[2]); 1564 upperMean = poly->coeff[1]*PS_SQR(upperStdev); 1565 psTrace("psLib.math", 6, "Parabolic Upper fit results: %f + %f x + %f x^2\n", poly->coeff[0], poly->coeff[1], poly->coeff[2]); 1566 psTrace("psLib.math", 6, "The upper mean is %f.\n", upperMean); 1567 psTrace("psLib.math", 6, "The upper stdev is %f.\n", upperStdev); 1568 1569 psFree (poly); 1570 } 1571 1572 // Clean up after fitting 1573 psFree (histogram); 1574 psFree (statsMinMax); 1575 } 1576 1577 // The fitted mean is the Gaussian mean. 1578 stats->fittedMean = guessMean; 1579 psTrace("psLib.math", 6, "The fitted mean is %f.\n", stats->fittedMean); 1580 1581 // The fitted standard deviation 1582 stats->fittedStdev = guessStdev; 1583 psTrace("psLib.math", 6, "The fitted stdev is %f.\n", stats->fittedStdev); 1584 1585 stats->results |= PS_STAT_FITTED_MEAN_V3; 1586 stats->results |= PS_STAT_FITTED_STDEV_V3; 1297 1587 1298 1588 return true; … … 1589 1879 1590 1880 // ************************************************************************ 1881 if (stats->options & (PS_STAT_FITTED_MEAN_V3 | PS_STAT_FITTED_STDEV_V3)) { 1882 if (stats->options & (PS_STAT_FITTED_MEAN | PS_STAT_FITTED_STDEV)) { 1883 psAbort ("stats", "you may not specify both FITTED_MEAN and FITTED_MEAN_V3"); 1884 } 1885 if (!vectorFittedStats_v3(inF32, errorsF32, maskU8, maskVal, stats)) { 1886 psError(PS_ERR_UNKNOWN, false, _("Failed to calculate fitted statistics")); 1887 status &= false; 1888 } 1889 } 1890 1891 // ************************************************************************ 1591 1892 if ((stats->options & PS_STAT_CLIPPED_MEAN) || (stats->options & PS_STAT_CLIPPED_STDEV)) { 1592 1893 if (!vectorClippedStats(inF32, errorsF32, maskU8, maskVal, stats)) { … … 1628 1929 READ_STAT("FITTED_MEAN_V2", PS_STAT_FITTED_MEAN_V2); 1629 1930 READ_STAT("FITTED_STDEV_V2", PS_STAT_FITTED_STDEV_V2); 1931 READ_STAT("FITTED_V3", PS_STAT_FITTED_MEAN_V3); 1932 READ_STAT("FITTED_MEAN_V3", PS_STAT_FITTED_MEAN_V3); 1933 READ_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3); 1630 1934 READ_STAT("CLIPPED", PS_STAT_CLIPPED_MEAN); 1631 1935 READ_STAT("CLIPPED_MEAN", PS_STAT_CLIPPED_MEAN); … … 1657 1961 WRITE_STAT("FITTED_MEAN_V2", PS_STAT_FITTED_MEAN_V2); 1658 1962 WRITE_STAT("FITTED_STDEV_V2", PS_STAT_FITTED_STDEV_V2); 1963 WRITE_STAT("FITTED_MEAN_V3", PS_STAT_FITTED_MEAN_V3); 1964 WRITE_STAT("FITTED_STDEV_V3", PS_STAT_FITTED_STDEV_V3); 1659 1965 WRITE_STAT("CLIPPED_MEAN", PS_STAT_CLIPPED_MEAN); 1660 1966 WRITE_STAT("CLIPPED_STDEV", PS_STAT_CLIPPED_STDEV); … … 1708 2014 case PS_STAT_FITTED_MEAN_V2: 1709 2015 case PS_STAT_FITTED_STDEV_V2: 2016 case PS_STAT_FITTED_MEAN_V3: 2017 case PS_STAT_FITTED_STDEV_V3: 1710 2018 case PS_STAT_CLIPPED_MEAN: 1711 2019 case PS_STAT_CLIPPED_STDEV: … … 1742 2050 case PS_STAT_FITTED_STDEV_V2: 1743 2051 return stats->fittedStdev; 2052 case PS_STAT_FITTED_MEAN_V3: 2053 return stats->fittedMean; 2054 case PS_STAT_FITTED_STDEV_V3: 2055 return stats->fittedStdev; 1744 2056 case PS_STAT_CLIPPED_MEAN: 1745 2057 return stats->clippedMean;
Note:
See TracChangeset
for help on using the changeset viewer.
