Changeset 7104
- Timestamp:
- May 10, 2006, 1:38:55 AM (20 years ago)
- File:
-
- 1 edited
-
trunk/psLib/src/math/psMinimizePolyFit.c (modified) (21 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/psLib/src/math/psMinimizePolyFit.c
r6939 r7104 10 10 * @author EAM, IfA 11 11 * 12 * @version $Revision: 1.1 0$ $Name: not supported by cvs2svn $13 * @date $Date: 2006-0 4-21 20:59:51$12 * @version $Revision: 1.11 $ $Name: not supported by cvs2svn $ 13 * @date $Date: 2006-05-10 11:38:55 $ 14 14 * 15 15 * Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii … … 24 24 /*****************************************************************************/ 25 25 #include <stdio.h> 26 #include <string.h> 26 27 #include <float.h> 27 28 #include <math.h> … … 299 300 polynomial of degree myPoly->nX to the data points (x, y) and return the 300 301 coefficients of that polynomial. 301 302 NOTE: We currently have implemented three algorithms. This one is303 non-standard. It ignores the orthogonal properties of the Chebyshev304 polys, and their known zero values. Instead, we first fit a regular305 ordinary polynomial to the data. This produces the coefficients for the306 various x^i terms. We then "reverse-engineer" those coefficients to307 determine the coefficients of the Chebyshev polys: basically for each308 c_ix^i term in the ordinary polynomial, we sum all x^i terms in the309 Chebshev polys: these must be equal. This creates a set of nOrder+1310 linear equations which can be easily solved. The resulting vector is the311 coefficients of the Chebyshev polys.312 313 This method is significantly slower than the standard NR algorithm. It314 was explicitly requested that we not use the NR algorithm.315 316 Matrix A[[][]:317 Element A[i][j] is the coefficient of the x^i term in the j-th cheby poly.318 319 XXX: This can be improved significantly, performance-wise. The second set320 of linear equations which must be "solved" are already in upper-triangular321 form. One must only perform the reverse-substitution, LUD decomposition.322 323 XXX: Also, we don't really need to generate the chebPolys data structure.324 We simply need the matrix which corresponds to a transpose of each Cheby325 polys coefficients.326 302 *****************************************************************************/ 327 303 static psPolynomial1D *vectorFitPolynomial1DCheb( … … 350 326 } 351 327 352 psS32 polyOrder = myPoly->nX; 353 psS32 numPoly = polyOrder + 1; 354 355 // 356 // We first fit an ordinary polynomial to the data. 357 // 358 psPolynomial1D *ordPoly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, polyOrder); 359 psPolynomial1D *rc = VectorFitPolynomial1DOrd(ordPoly, mask, maskValue, y, yErr, x); 360 if (rc == NULL) { 361 psError(PS_ERR_UNKNOWN, false, "Could not fit a preliminary polynomial to the data vector. Returning NULL.\n"); 362 psFree(myPoly); 363 return(NULL); 364 } 365 366 // 367 // Create the A-matrix and B-vector which correspond to the linear equations 368 // which will be solved and will then yield the Cheby poly coefficients. 369 // 370 psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(numPoly); 371 psImage *A = psImageAlloc(numPoly, numPoly, PS_TYPE_F64); 372 psVector *B = psVectorAlloc(numPoly, PS_TYPE_F64); 373 B->n = B->nalloc; 374 for (psS32 i = 0 ; i < numPoly ; i++) { 375 for (psS32 j = 0 ; j < numPoly ; j++) { 376 A->data.F64[i][j] = 0.0; 377 } 378 B->data.F64[i] = ordPoly->coeff[i]; 379 } 380 381 for (psS32 i = 0 ; i < numPoly ; i++) { 382 for (psS32 j = 0 ; j < numPoly ; j++) { 383 if (i <= chebPolys[j]->nX) 384 A->data.F64[i][j]+= chebPolys[j]->coeff[i]; 385 } 386 } 387 // The following statement is essential. It derives from (5.8.8) NR. 388 A->data.F64[0][0] = 0.5; 389 psFree(ordPoly); 328 int numTerms = myPoly->nX + 1; // Number of polynomial terms 329 int numData = x->n; // Number of data elements 330 331 psImage *A = psImageAlloc(numTerms, numTerms, PS_TYPE_F64); // Least-squares matrix 332 psVector *B = psVectorAlloc(numTerms, PS_TYPE_F64); // Least-squares vector 333 B->n = numTerms; 334 psImageInit(A, 0.0); 335 psVectorInit(B, 0.0); 336 337 psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(numTerms); // The chebyshev polynomials 338 psImage *sums = psImageAlloc(numData, numTerms, PS_TYPE_F64); 339 for (int i = 0; i < numTerms; i++) { 340 if (myPoly->mask[i]) { 341 continue; 342 } 343 psPolynomial1D *cheb = chebPolys[i]; 344 psVector *sum = psPolynomial1DEvalVector(cheb, x); 345 memcpy(sums->data.F64[i], sum->data.F64, numData*sizeof(psF64)); 346 psFree(sum); 347 psFree(cheb); 348 } 349 psFree(chebPolys); 350 351 // Dereference pointers, for speed in the loop 352 psF64 **matrix = A->data.F64; // Least-squares matrix 353 psF64 *vector = B->data.F64; // Least-squares vector 354 psU8 *dataMask = NULL; // Mask for data 355 if (mask) { 356 dataMask = mask->data.U8; 357 } 358 psU8 *termMask = myPoly->mask; // Mask for polynomial terms 359 psF64 *yData = y->data.F64; // Coordinate data 360 psF64 *yErrData = NULL; // Errors in the coordinate 361 if (yErr) { 362 yErrData = yErr->data.F64; 363 } 364 psF64 **sumsData = sums->data.F64; // Sums 365 366 for (int k = 0; k < numData; k++) { 367 if (dataMask && dataMask[k]) { 368 continue; 369 } 370 371 double wt; 372 if (!yErr) { 373 wt = 1.0; 374 } else { 375 // this filters fErr == 0 values 376 wt = (yErrData[k] == 0) ? 0.0 : 1.0 / PS_SQR(yErrData[k]); 377 } 378 379 for (int i = 0; i < numTerms; i++) { 380 if (termMask[i]) { 381 continue; 382 } 383 vector[i] += yData[k] * sumsData[i][k] * wt; 384 matrix[i][i] += sumsData[i][k] * sumsData[i][k] * wt; // The diagonal entry 385 for (int j = i + 1; j < numTerms; j++) { // The upper diagonal only: we will use symmetry 386 if (termMask[j]) { 387 continue; 388 } 389 double value = sumsData[i][k] * sumsData[j][k] * wt; // The value to add to the matrix 390 matrix[i][j] += value; 391 matrix[j][i] += value; // Taking advantage of the symmetry 392 } 393 } 394 } 395 psFree(sums); 396 390 397 if (psTraceGetLevel(__func__) >= 6) { 391 398 PS_IMAGE_PRINT_F64(A); … … 395 402 if (USE_GAUSS_JORDAN) { 396 403 // GaussJordan version 397 if ( false == psGaussJordan(A, B)) {404 if (!psMatrixGJSolve(A, B)) { 398 405 psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations. Returning NULL.\n"); 399 406 psFree(myPoly); … … 402 409 // the first nTerm entries in B correspond directly to the desired 403 410 // polynomial coefficients. this is only true for the 1D case 404 for (psS32 k = 0; k < num Poly; k++) {411 for (psS32 k = 0; k < numTerms; k++) { 405 412 myPoly->coeff[k] = B->data.F64[k]; 406 } 413 myPoly->coeffErr[k] = sqrt(A->data.F64[k][k]); 414 } 415 // The constant needs to be multiplied by 2, because it's half the a_0. 416 myPoly->coeff[0] *= 2.0; 417 myPoly->coeffErr[0] *= 2.0; 407 418 } 408 419 } else { … … 412 423 psVector* coeffs = NULL; 413 424 414 ALUD = psImageAlloc(num Poly, numPoly, PS_TYPE_F64);425 ALUD = psImageAlloc(numTerms, numTerms, PS_TYPE_F64); 415 426 ALUD = psMatrixLUD(ALUD, &outPerm, A); 416 427 if (ALUD == NULL) { … … 425 436 myPoly = NULL; 426 437 } else { 427 for (psS32 k = 0; k < num Poly; k++) {438 for (psS32 k = 0; k < numTerms; k++) { 428 439 myPoly->coeff[k] = coeffs->data.F64[k]; 429 440 } … … 439 450 psFree(A); 440 451 psFree(B); 441 for (psS32 i=0;i<numPoly;i++) {442 psFree(chebPolys[i]);443 }444 psFree(chebPolys);445 452 446 453 return(myPoly); 447 454 } 448 449 450 /******************************************************************************451 vectorFitPolynomial1DChebSlow(): This routine will fit a Chebyshev polynomial452 of degree myPoly to the data points (x, y) and return the coefficients of that453 polynomial.454 455 NOTE: We currently have implemented three algorithms. This one is456 non-standard. It ignores the orthogonal properties of the Chebyshev457 polys, and their known zero values. Instead, we do build a system of458 linear equations based on minimizing the chi-squared for all data points459 and we then solve those equations. This method is significantly slower460 than the other algorithms. It was explicitly requested that we implement461 this algorithm.462 *****************************************************************************/463 static psPolynomial1D *vectorFitPolynomial1DChebySlow(464 psPolynomial1D* myPoly,465 const psVector *mask,466 psMaskType maskValue,467 const psVector* y,468 const psVector* yErr,469 const psVector* x)470 {471 PS_ASSERT_POLY_NON_NULL(myPoly, NULL);472 PS_ASSERT_INT_LARGER_THAN_OR_EQUAL(myPoly->nX, 0, NULL);473 PS_ASSERT_VECTOR_NON_NULL(y, NULL);474 PS_ASSERT_VECTOR_TYPE(y, PS_TYPE_F64, NULL);475 if (yErr != NULL) {476 PS_ASSERT_VECTORS_SIZE_EQUAL(y, yErr, NULL);477 PS_ASSERT_VECTOR_TYPE(yErr, PS_TYPE_F64, NULL);478 }479 if (x != NULL) {480 PS_ASSERT_VECTORS_SIZE_EQUAL(y, x, NULL);481 PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);482 }483 if (mask != NULL) {484 PS_ASSERT_VECTORS_SIZE_EQUAL(y, mask, NULL);485 PS_ASSERT_VECTOR_TYPE(mask, PS_TYPE_U8, NULL);486 }487 488 psS32 NUM_POLY = myPoly->nX+1;489 psS32 NUM_DATA = 0;490 if (mask != NULL) {491 for (psS32 d = 0 ; d < mask->n ; d++) {492 if (!(maskValue & mask->data.U8[d])) {493 NUM_DATA++;494 }495 }496 } else {497 NUM_DATA = x->n;498 }499 500 psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(NUM_POLY);501 if (0) {502 for (psS32 j = 0; j < NUM_POLY; j++) {503 PS_POLY_PRINT_1D(chebPolys[j]);504 }505 }506 507 // Pre-compute the various Chebyshev polys T_i(x[j]) for all x[]508 psImage *tMatrix = psImageAlloc(NUM_DATA, NUM_POLY, PS_TYPE_F64);509 for (psS32 p = 0 ; p < NUM_POLY ; p++) {510 if (mask == NULL) {511 for (psS32 d = 0 ; d < NUM_DATA ; d++) {512 tMatrix->data.F64[p][d] = psPolynomial1DEval(chebPolys[p], x->data.F64[d]);513 }514 } else {515 psS32 dPtr = 0;516 for (psS32 d = 0 ; d < mask->n ; d++) {517 if (!(maskValue & mask->data.U8[d])) {518 tMatrix->data.F64[p][dPtr] = psPolynomial1DEval(chebPolys[p], x->data.F64[d]);519 dPtr++;520 }521 }522 }523 }524 525 // Compute the A matrix526 psImage *A = psImageAlloc(NUM_POLY, NUM_POLY, PS_TYPE_F64);527 for (psS32 i = 0 ; i < NUM_POLY ; i++) {528 for (psS32 j = 0 ; j < NUM_POLY ; j++) {529 A->data.F64[i][j] = 0.0;530 for (psS32 d = 0 ; d < NUM_DATA ; d++) {531 A->data.F64[i][j]+= (tMatrix->data.F64[j][d] * tMatrix->data.F64[i][d]);532 }533 }534 // This is because of the last term in: f(x) = SUM[c_i * T_i(x)] - c_0/2535 for (psS32 d = 0 ; d < NUM_DATA ; d++) {536 A->data.F64[i][0] -= (tMatrix->data.F64[i][d]/2.0);537 }538 }539 540 // Compute the B vector541 psVector *B = psVectorAlloc(NUM_POLY, PS_TYPE_F64);542 B->n = B->nalloc;543 for (psS32 i = 0 ; i < NUM_POLY ; i++) {544 B->data.F64[i] = 0.0;545 if (mask == NULL) {546 for (psS32 d = 0 ; d < NUM_DATA ; d++) {547 B->data.F64[i] += (y->data.F64[d] * tMatrix->data.F64[i][d]);548 }549 } else {550 psS32 dPtr = 0;551 for (psS32 d = 0 ; d < mask->n ; d++) {552 if (!(maskValue & mask->data.U8[d])) {553 B->data.F64[i] += (y->data.F64[d] * tMatrix->data.F64[i][dPtr++]);554 }555 }556 }557 }558 559 if (USE_GAUSS_JORDAN) {560 // GaussJordan version561 if (false == psGaussJordan(A, B)) {562 psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations. Returning NULL.\n");563 psFree(myPoly);564 myPoly = NULL;565 } else {566 // the first nTerm entries in B correspond directly to the desired567 // polynomial coefficients. this is only true for the 1D case568 for (psS32 k = 0; k < NUM_POLY; k++) {569 myPoly->coeff[k] = B->data.F64[k];570 }571 }572 } else {573 // LUD version of the fit574 psImage *ALUD = NULL;575 psVector* outPerm = NULL;576 psVector* coeffs = NULL;577 578 ALUD = psImageAlloc(NUM_POLY, NUM_POLY, PS_TYPE_F64);579 ALUD = psMatrixLUD(ALUD, &outPerm, A);580 if (ALUD == NULL) {581 psError(PS_ERR_UNKNOWN, false, "Could not do LUD decomposition on matrix. Returning NULL.\n");582 psFree(myPoly);583 myPoly = NULL;584 } else {585 coeffs = psMatrixLUSolve(coeffs, ALUD, B, outPerm);586 if (coeffs == NULL) {587 psError(PS_ERR_UNKNOWN, false, "Could not solve LUD matrix. Returning NULL.\n");588 psFree(myPoly);589 myPoly = NULL;590 } else {591 for (psS32 k = 0; k < NUM_POLY; k++) {592 myPoly->coeff[k] = coeffs->data.F64[k];593 }594 }595 }596 597 psFree(ALUD);598 psFree(coeffs);599 psFree(outPerm);600 }601 602 psFree(A);603 psFree(B);604 psFree(tMatrix);605 for (psS32 i=0;i<NUM_POLY;i++) {606 psFree(chebPolys[i]);607 }608 psFree(chebPolys);609 610 return(myPoly);611 }612 613 /******************************************************************************614 vectorFitPolynomial1DChebFast(): This routine will fit a Chebyshev polynomial615 of degree myPoly to the data points (x, y) and return the coefficients of that616 polynomial.617 618 NOTE: We currently have three algorithms. This is standard method which619 uses the orthogonal properties of the Chebyshev polys, and their known620 zero values. This is significantly faster than the chi-squared621 approaches.622 623 NOTE: This function will not work properly if the x-vector does not fully span624 the [-1:1] interval.625 626 NOTE: mask, maskValue, yErr are ignored with this function.627 *****************************************************************************/628 static psPolynomial1D *vectorFitPolynomial1DChebyFast(629 psPolynomial1D* myPoly,630 const psVector *mask,631 psMaskType maskValue,632 const psVector* y,633 const psVector* yErr,634 const psVector* x)635 {636 PS_ASSERT_POLY_NON_NULL(myPoly, NULL);637 PS_ASSERT_INT_NONNEGATIVE(myPoly->nX, NULL);638 PS_ASSERT_VECTOR_NON_NULL(y, NULL);639 PS_ASSERT_VECTOR_TYPE(y, PS_TYPE_F64, NULL);640 if (yErr != NULL) {641 PS_ASSERT_VECTORS_SIZE_EQUAL(y, yErr, NULL);642 PS_ASSERT_VECTOR_TYPE(yErr, PS_TYPE_F64, NULL);643 }644 if (x != NULL) {645 PS_ASSERT_VECTORS_SIZE_EQUAL(y, x, NULL);646 PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);647 }648 649 psS32 j;650 psS32 k;651 psS32 n = y->n;652 psF64 fac;653 psF64 sum;654 PS_VECTOR_GEN_STATIC_RECYCLED(f, n, PS_TYPE_F64);655 psScalar *fScalar;656 psScalar tmpScalar;657 tmpScalar.type.type = PS_TYPE_F64;658 659 // These assignments appear too simple to warrant code and660 // variable declarations. I retain them here to maintain coherence661 // with the NR code.662 psF64 min = -1.0;663 psF64 max = 1.0;664 psF64 bma = 0.5 * (max-min); // 1665 psF64 bpa = 0.5 * (max+min); // 0666 667 // In this loop, we first calculate the values of X for which the668 // Chebyshev polynomials are zero (see NR, section 5.4). Then we669 // calculate the value of the function we are fitting the Chebyshev670 // polynomials to at those values of X. This is a bit tricky since671 // we don't know that function. So, we instead do 3-order LaGrange672 // interpolation at the point X for the psVectors x,y for which we673 // are fitting this ChebyShev polynomial to.674 675 for (psS32 i=0;i<n;i++) {676 // NR 5.8.4677 // NR 5.8.4678 psF64 Y = cos(M_PI * (0.5 + ((psF32) i)) / ((psF32) n));679 psF64 X = (Y + bma + bpa) - 1.0;680 tmpScalar.data.F64 = X;681 682 // We interpolate against the tabluated x,y vectors to determine the683 // function value at X.684 // XXX: This is somewhat of a hack to handle cases where the x vector does685 // not fully span the [-1.0:1.0] interval. We set the values of f[] to the686 // values of y[] at those endpoints.687 // XXX: This only works if x[] is increasing.688 689 if (X <= x->data.F64[0]) {690 f->data.F64[i] = y->data.F64[0];691 } else if (X >= x->data.F64[x->n-1]) {692 f->data.F64[i] = y->data.F64[x->n-1];693 } else {694 fScalar = p_psVectorInterpolate(NULL, (psVector *) x, (psVector *) y, 3, &tmpScalar);695 if (fScalar == NULL) {696 psError(PS_ERR_UNKNOWN, false, "Could not perform vector interpolation. Returning NULL.\n");697 psFree(myPoly)698 return(NULL);699 }700 701 f->data.F64[i] = fScalar->data.F64;702 psFree(fScalar);703 }704 705 psTrace(__func__, 6, "(x, X, y, f(X)) is (%f, %f, %f, %f)\n",706 x->data.F64[i], X, y->data.F64[i], f->data.F64[i]);707 }708 709 // We have the values for f() at the zero points, we now calculate the710 // coefficients of the Chebyshev polynomial: NR 5.8.7.711 712 fac = 2.0/((psF32) n);713 for (j=0;j<myPoly->nX+1;j++) {714 sum = 0.0;715 for (k=0;k<n;k++) {716 sum+= f->data.F64[k] *717 cos(M_PI * ((psF32) j) * (0.5 + ((psF32) k)) / ((psF32) n));718 }719 720 myPoly->coeff[j] = fac * sum;721 }722 723 return(myPoly);724 }725 726 727 455 728 456 /****************************************************************************** … … 742 470 psTrace(__func__, 4, "---- %s() begin ----\n", __func__); 743 471 PS_ASSERT_POLY_NON_NULL(myPoly, NULL); 744 PS_ASSERT_INT_NONNEGATIVE(myPoly->nX, NULL);745 472 PS_ASSERT_VECTOR_NON_NULL(f, NULL); 746 473 PS_ASSERT_VECTOR_TYPE(f, PS_TYPE_F64, NULL); 747 if (mask != NULL) {474 if (mask) { 748 475 PS_ASSERT_VECTORS_SIZE_EQUAL(f, mask, NULL); 749 476 PS_ASSERT_VECTOR_TYPE(mask, PS_TYPE_U8, NULL); 750 477 } 751 if (x != NULL) {478 if (x) { 752 479 PS_ASSERT_VECTORS_SIZE_EQUAL(f, x, NULL); 753 480 PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL); 754 481 } 755 if (fErr != NULL) {482 if (fErr) { 756 483 PS_ASSERT_VECTORS_SIZE_EQUAL(f, fErr, NULL); 757 484 PS_ASSERT_VECTOR_TYPE(fErr, PS_TYPE_F64, NULL); 758 485 } 759 760 psImage* A = NULL;761 psVector* B = NULL;762 psVector* xSums = NULL;763 psS32 nTerm;764 psS32 nOrder;765 psF64 wt;766 486 767 487 if (psTraceGetLevel(__func__) >= 6) { … … 782 502 } 783 503 784 nTerm = 1 + myPoly->nX; 785 nOrder = nTerm - 1; 786 787 A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); 788 B = psVectorAlloc(nTerm, PS_TYPE_F64); 504 int nTerm = myPoly->nX + 1; // Number of terms in the equation 505 int nData = f->n; // Number of data points 506 psImage *A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); // Least-squares matrix 507 psVector *B = psVectorAlloc(nTerm, PS_TYPE_F64); // Least-squares vector 789 508 B->n = B->nalloc; 509 790 510 // Initialize data structures. 791 511 if (!psImageInit(A, 0.0) || !psVectorInit(B, 0.0)) { … … 798 518 } 799 519 800 // xSums look like: 1, x, x^2, ... x^(2n+1) 801 // Build the B and A data structs. 802 // XXX EAM : use temp pointers eg vB = B->data.F64 to save redirects 803 for (psS32 k = 0; k < f->n; k++) { 804 if ((mask != NULL) && (mask->data.U8[k] && maskValue)) { 520 // Build the least squares matrix and vector 521 522 // Dereference some pointers for speed in the loop 523 psU8 *dataMask = NULL; // Dereferenced version of mask for data points 524 if (mask) { 525 dataMask = mask->data.U8; 526 } 527 psU8 *termMask = myPoly->mask; // Dereferenced version of mask for polynomial terms 528 psF64 *ordinates = NULL; // Dereferenced version of ordinate data 529 if (x) { 530 ordinates = x->data.F64; 531 } 532 psF64 *coordinates = f->data.F64; // Dereferenced version of coordinate data 533 psF64 *coordErr = NULL; // Dereferenced version of coordinate errors 534 if (fErr) { 535 coordErr = fErr->data.F64; 536 } 537 psF64 *vector = B->data.F64; // Dereferenced version of least-squares vector 538 psF64 **matrix = A->data.F64; // Dereferenced version of least-squares matrix 539 540 psVector* xSums = NULL; // Contains 1, x, x^2, x^3, etc, for ease of calculation 541 for (int k = 0; k < nData; k++) { 542 if (dataMask && dataMask[k] & maskValue) { 805 543 continue; 806 544 } 807 if ( x != NULL) {808 xSums = BuildSums1D(xSums, x->data.F64[k], nTerm);545 if (ordinates) { 546 xSums = BuildSums1D(xSums, ordinates[k], nTerm); 809 547 } else { 810 xSums = BuildSums1D(xSums, (psF64) k, nTerm); 811 } 812 813 if (fErr == NULL) { 548 xSums = BuildSums1D(xSums, (psF64)k, nTerm); 549 } 550 psF64 *sums = xSums->data.F64; // Dereferenced version of sums 551 552 double wt; 553 if (!fErr) { 814 554 wt = 1.0; 815 555 } else { 816 556 // this filters fErr == 0 values 817 wt = (fErr->data.F64[k] == 0) ? 0.0 : 1.0 / PS_SQR(fErr->data.F64[k]); 818 } 819 for (psS32 i = 0; i < nTerm; i++) { 820 if (myPoly->mask[i]) 557 wt = (coordErr[k] == 0) ? 0.0 : 1.0 / PS_SQR(coordErr[k]); 558 } 559 560 for (int i = 0; i < nTerm; i++) { 561 if (termMask[i]) { 821 562 continue; 822 B->data.F64[i] += f->data.F64[k] * xSums->data.F64[i] * wt; 823 } 824 825 // we could skip half of the array and assign at the end 826 // we must handle masked orders 827 for (psS32 i = 0; i < nTerm; i++) { 828 if (myPoly->mask[i]) 829 continue; 830 for (psS32 j = 0; j < nTerm; j++) { 831 if (myPoly->mask[j]) 563 } 564 vector[i] += coordinates[k] * sums[i] * wt; 565 matrix[i][i] += sums[2 * i] * wt; // The diagonal entry 566 for (int j = i + 1; j < nTerm; j++) { // The upper diagonal only: we will use symmetry 567 if (termMask[j]) { 832 568 continue; 833 A->data.F64[i][j] += xSums->data.F64[i + j] * wt; 834 } 835 } 836 } 837 569 } 570 double value = sums[i + j] * wt; // The value to add to the matrix 571 matrix[i][j] += value; 572 matrix[j][i] += value; // Taking advantage of the symmetry 573 } 574 } 575 } 576 psFree(xSums); 577 578 #if 0 838 579 // set masked elements in A,B appropriately 580 // PAP: Is this necessary, given we initialise to zero, and we have already masked on the terms? 839 581 for (int i = 0; i < nTerm; i++) { 840 if (! myPoly->mask[i])582 if (!termMask[i]) 841 583 continue; 842 B->data.F64[i] = 0; 843 for (int j = 0; j < nTerm; j++) { 844 A->data.F64[i][j] = (i == j) ? 1 : 0; 845 } 846 } 847 848 849 // XXX: rel10_ifa used psGaussJordan(). However, this failed tests. So, I'm using psMatrixLUD(). 584 vector[i] = 0; 585 matrix[i][i] = 1.0; 586 for (int j = i + 1; j < nTerm; j++) { 587 matrix[i][j] = 0.0; 588 matrix[j][i] = 0.0; 589 } 590 } 591 #endif 592 593 if (psTraceGetLevel(__func__) >= 4) { 594 printf("Least-squares vector:\n"); 595 for (int i = 0; i < nTerm; i++) { 596 printf("%f ", B->data.F64[i]); 597 } 598 printf("\n"); 599 printf("Least-squares matrix:\n"); 600 for (int i = 0; i < nTerm; i++) { 601 for (int j = 0; j < nTerm; j++) { 602 printf("%f ", A->data.F64[i][j]); 603 } 604 printf("\n"); 605 } 606 } 607 608 // XXX: rel10_ifa used psMatrixGJSolve(). However, this failed tests. So, I'm using psMatrixLUD(). 850 609 if (USE_GAUSS_JORDAN) { 851 610 // GaussJordan version 852 if ( false == psGaussJordan(A, B)) {611 if (!psMatrixGJSolve(A, B)) { 853 612 psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations. Returning NULL.\n"); 854 613 psFree(myPoly); … … 897 656 psFree(A); 898 657 psFree(B); 899 psFree(xSums);900 658 901 659 psTrace(__func__, 4, "---- %s() End ----\n", __func__); … … 924 682 925 683 PS_ASSERT_POLY_NON_NULL(poly, NULL); 926 PS_ASSERT_INT_NONNEGATIVE(poly->nX, NULL);684 //PS_ASSERT_INT_NONNEGATIVE(poly->nX, NULL); 927 685 PS_ASSERT_VECTOR_NON_NULL(f, NULL); 928 686 PS_ASSERT_VECTOR_NON_EMPTY(f, NULL); … … 981 739 } 982 740 983 if (1) { 984 poly = vectorFitPolynomial1DCheb(poly, mask, maskValue, f64, fErr64, x64); 985 } else { 986 if (0) { 987 poly = vectorFitPolynomial1DChebySlow(poly, mask, maskValue, f64, fErr64, x64); 988 poly = vectorFitPolynomial1DChebyFast(poly, mask, maskValue, f64, fErr64, x64); 989 } 990 } 741 poly = vectorFitPolynomial1DCheb(poly, mask, maskValue, f64, fErr64, x64); 742 991 743 if (x == NULL) { 992 744 psFree(x64); … … 1221 973 } 1222 974 1223 // I think this is 1 dimension down 1224 psImage* A = NULL; 1225 psVector* B = NULL; 1226 psImage* Sums = NULL; 1227 psF64 wt; 1228 psS32 nTerm; 1229 1230 psS32 nXterm = 1 + myPoly->nX; 1231 psS32 nYterm = 1 + myPoly->nY; 1232 nTerm = nXterm * nYterm; 1233 1234 A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); 1235 B = psVectorAlloc(nTerm, PS_TYPE_F64); 975 // Number of polynomial terms 976 int nXterm = 1 + myPoly->nX; // Number of terms in x 977 int nYterm = 1 + myPoly->nY; // Number of terms in y 978 int nTerm = nXterm * nYterm; // Total number of terms 979 980 psImage *A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); // Least-squares matrix 981 psVector *B = psVectorAlloc(nTerm, PS_TYPE_F64); // Least-squares vector 1236 982 B->n = B->nalloc; 983 1237 984 // Initialize data structures. 1238 985 if (!psImageInit(A, 0.0) || !psVectorInit(B, 0.0)) { … … 1245 992 } 1246 993 1247 // Sums look like: 1, x, x^2, ... x^(2n+1), y, xy, x^2y, ... x^(2n+1) 1248 1249 // Build the B and A data structs. 1250 for (psS32 k = 0; k < x->n; k++) { 1251 if ((mask != NULL) && (mask->data.U8[k] & maskValue)) { 994 // Dereference stuff, to make the loop go faster 995 psF64 **matrix = A->data.F64; // Dereference the least-squares matrix 996 psF64 *vector = B->data.F64; // Dereference the least-squares vector 997 psU8 **termMask = myPoly->mask; // Dereference mask for polynomial terms 998 psU8 *dataMask = NULL; // Dereference mask for data 999 if (mask) { 1000 dataMask = mask->data.U8; 1001 } 1002 psF64 *xData = x->data.F64; // Dereference x 1003 psF64 *yData = y->data.F64; // Dereference y 1004 psF64 *fData = f->data.F64; // Dereference f 1005 psF64 *fErrData = NULL; // Dereference fErr 1006 if (fErr) { 1007 fErrData = fErr->data.F64; 1008 } 1009 1010 // Build the least-squares matrix and vector 1011 psImage *xySums = NULL; // The sums: 1, x, x^2, ... x^(2n+1), y, xy, x^2y, ... x^(2n+1) 1012 for (int k = 0; k < x->n; k++) { 1013 if (dataMask && dataMask[k] & maskValue) { 1252 1014 continue; 1253 1015 } 1254 1255 Sums = BuildSums2D(Sums, x->data.F64[k], y->data.F64[k], nXterm, nYterm); 1256 1257 if (fErr == NULL) { 1016 xySums = BuildSums2D(xySums, xData[k], yData[k], nXterm, nYterm); 1017 psF64 **sums = xySums->data.F64;// Dereference sums 1018 1019 double wt; // Weight 1020 if (!fErrData) { 1258 1021 wt = 1.0; 1259 1022 } else { 1260 1023 // this filters fErr == 0 values 1261 wt = (fErr->data.F64[k] == 0.0) ? 0.0 : 1.0 / PS_SQR(fErr->data.F64[k]); 1262 } 1263 1264 // we could skip half of the array and assign at the end 1265 // we must handle masked orders 1266 for (psS32 n = 0; n < nXterm; n++) { 1267 for (psS32 m = 0; m < nYterm; m++) { 1268 if (myPoly->mask[n][m]) 1024 wt = (fErrData[k] == 0.0) ? 0.0 : 1.0 / PS_SQR(fErrData[k]); 1025 } 1026 1027 // Iterating over the matrix 1028 for (int i = 0; i < nTerm; i++) { 1029 int l = i % nXterm; // x index 1030 int m = i / nXterm; // y index 1031 if (termMask[l][m]) { 1032 continue; 1033 } 1034 vector[i] += fData[k] * sums[l][m] * wt; 1035 matrix[i][i] += sums[2*l][2*m] * wt; // The diagonal entry 1036 for (int j = i + 1; j < nTerm; j++) { // Doing the upper diagonal only: we will use symmetry 1037 int p = j % nXterm; // x index 1038 int q = j / nXterm; // y index 1039 if (termMask[p][q]) { 1269 1040 continue; 1270 B->data.F64[n+m*nXterm] += f->data.F64[k] * Sums->data.F64[n][m] * wt; 1271 } 1272 } 1273 1274 for (psS32 i = 0; i < nXterm; i++) { 1275 for (psS32 j = 0; j < nYterm; j++) { 1276 if (myPoly->mask[i][j]) 1277 continue; 1278 for (psS32 n = 0; n < nXterm; n++) { 1279 for (psS32 m = 0; m < nYterm; m++) { 1280 if (myPoly->mask[n][m]) 1281 continue; 1282 A->data.F64[i+j*nXterm][n+m*nXterm] += Sums->data.F64[i+n][j+m] * wt; 1283 } 1284 } 1285 } 1286 } 1287 } 1288 1289 // set masked elements appropriately 1290 for (int i = 0; i < nXterm; i++) { 1291 for (int j = 0; j < nYterm; j++) { 1292 if (!myPoly->mask[i][j]) 1293 continue; 1294 int nx = i+j*nXterm; 1295 B->data.F64[nx] = 0; 1296 for (int n = 0; n < nXterm; n++) { 1297 for (int m = 0; m < nYterm; m++) { 1298 int ny = n+m*nXterm; 1299 A->data.F64[nx][ny] = (nx == ny) ? 1 : 0; 1300 } 1301 } 1302 } 1303 } 1304 1305 if (false == psGaussJordan(A, B)) { 1041 } 1042 double value = sums[l+p][m+q] * wt; // Value to add in 1043 matrix[i][j] += value; 1044 matrix[j][i] += value; // Taking advantage of the symmetry 1045 } 1046 } 1047 } 1048 psFree(xySums); 1049 1050 if (!psMatrixGJSolve(A, B)) { 1306 1051 psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations. Returning NULL.\n"); 1307 1052 psFree(myPoly); … … 1309 1054 } else { 1310 1055 // select the appropriate solution entries 1311 for ( psS32n = 0; n < nXterm; n++) {1312 for ( psS32m = 0; m < nYterm; m++) {1056 for (int n = 0; n < nXterm; n++) { 1057 for (int m = 0; m < nYterm; m++) { 1313 1058 myPoly->coeff[n][m] = B->data.F64[n+m*nXterm]; 1314 1059 myPoly->coeffErr[n][m] = sqrt(A->data.F64[n+m*nXterm][n+m*nXterm]); … … 1319 1064 psFree(A); 1320 1065 psFree(B); 1321 psFree(Sums);1322 1066 1323 1067 psTrace(__func__, 4, "---- %s() end ----\n", __func__); … … 1767 1511 } 1768 1512 1769 // XXX: rel10_ifa used ps GaussJordan(). However, this failed tests. So, I'm using psMatrixLUD().1513 // XXX: rel10_ifa used psMatrixGJSolve(). However, this failed tests. So, I'm using psMatrixLUD(). 1770 1514 if (USE_GAUSS_JORDAN) { 1771 1515 // does the solution in place 1772 1516 // The matrices were overflowing, so I switched to LUD. 1773 if ( false == psGaussJordan(A, B)) {1517 if (!psMatrixGJSolve(A, B)) { 1774 1518 psFree(A); 1775 1519 psFree(B); … … 2324 2068 2325 2069 2326 // XXX: rel10_ifa used ps GaussJordan(). However, this failed tests. So, I'm using psMatrixLUD().2070 // XXX: rel10_ifa used psMatrixGJSolve(). However, this failed tests. So, I'm using psMatrixLUD(). 2327 2071 if (USE_GAUSS_JORDAN) { 2328 2072 // does the solution in place 2329 2073 // The GaussJordan version was overflowing, so I'm using LUD. 2330 if ( false == psGaussJordan(A, B)) {2074 if (psMatrixGJSolve(A, B)) { 2331 2075 psFree(A); 2332 2076 psFree(B);
Note:
See TracChangeset
for help on using the changeset viewer.
