IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
May 10, 2006, 1:38:55 AM (20 years ago)
Author:
Paul Price
Message:

Rewrote vectorFitPolynomial1DCheb to use the standard linear fitting
method (invert least-squares matrix). Tweaked ordinary 1D and 2D
polynomial fitting for speed.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/psLib/src/math/psMinimizePolyFit.c

    r6939 r7104  
    1010 *  @author EAM, IfA
    1111 *
    12  *  @version $Revision: 1.10 $ $Name: not supported by cvs2svn $
    13  *  @date $Date: 2006-04-21 20:59:51 $
     12 *  @version $Revision: 1.11 $ $Name: not supported by cvs2svn $
     13 *  @date $Date: 2006-05-10 11:38:55 $
    1414 *
    1515 *  Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
     
    2424/*****************************************************************************/
    2525#include <stdio.h>
     26#include <string.h>
    2627#include <float.h>
    2728#include <math.h>
     
    299300polynomial of degree myPoly->nX to the data points (x, y) and return the
    300301coefficients of that polynomial.
    301  
    302     NOTE: We currently have implemented three algorithms.  This one is
    303     non-standard.  It ignores the orthogonal properties of the Chebyshev
    304     polys, and their known zero values.  Instead, we first fit a regular
    305     ordinary polynomial to the data.  This produces the coefficients for the
    306     various x^i terms.  We then "reverse-engineer" those coefficients to
    307     determine the coefficients of the Chebyshev polys: basically for each
    308     c_ix^i term in the ordinary polynomial, we sum all x^i terms in the
    309     Chebshev polys: these must be equal.  This creates a set of nOrder+1
    310     linear equations which can be easily solved.  The resulting vector is the
    311     coefficients of the Chebyshev polys.
    312    
    313     This method is significantly slower than the standard NR algorithm.  It
    314     was explicitly requested that we not use the NR algorithm.
    315  
    316  Matrix A[[][]:
    317      Element A[i][j] is the coefficient of the x^i term in the j-th cheby poly.
    318  
    319     XXX: This can be improved significantly, performance-wise.  The second set
    320     of linear equations which must be "solved" are already in upper-triangular
    321     form.  One must only perform the reverse-substitution, LUD decomposition.
    322  
    323     XXX: Also, we don't really need to generate the chebPolys data structure.
    324     We simply need the matrix which corresponds to a transpose of each Cheby
    325     polys coefficients.
    326302*****************************************************************************/
    327303static psPolynomial1D *vectorFitPolynomial1DCheb(
     
    350326    }
    351327
    352     psS32 polyOrder = myPoly->nX;
    353     psS32 numPoly = polyOrder + 1;
    354 
    355     //
    356     // We first fit an ordinary polynomial to the data.
    357     //
    358     psPolynomial1D *ordPoly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, polyOrder);
    359     psPolynomial1D *rc = VectorFitPolynomial1DOrd(ordPoly, mask, maskValue, y, yErr, x);
    360     if (rc == NULL) {
    361         psError(PS_ERR_UNKNOWN, false, "Could not fit a preliminary polynomial to the data vector.  Returning NULL.\n");
    362         psFree(myPoly);
    363         return(NULL);
    364     }
    365 
    366     //
    367     // Create the A-matrix and B-vector which correspond to the linear equations
    368     // which will be solved and will then yield the Cheby poly coefficients.
    369     //
    370     psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(numPoly);
    371     psImage *A = psImageAlloc(numPoly, numPoly, PS_TYPE_F64);
    372     psVector *B = psVectorAlloc(numPoly, PS_TYPE_F64);
    373     B->n = B->nalloc;
    374     for (psS32 i = 0 ; i < numPoly ; i++) {
    375         for (psS32 j = 0 ; j < numPoly ; j++) {
    376             A->data.F64[i][j] = 0.0;
    377         }
    378         B->data.F64[i] = ordPoly->coeff[i];
    379     }
    380 
    381     for (psS32 i = 0 ; i < numPoly ; i++) {
    382         for (psS32 j = 0 ; j < numPoly ; j++) {
    383             if (i <= chebPolys[j]->nX)
    384                 A->data.F64[i][j]+= chebPolys[j]->coeff[i];
    385         }
    386     }
    387     // The following statement is essential.  It derives from (5.8.8) NR.
    388     A->data.F64[0][0] = 0.5;
    389     psFree(ordPoly);
     328    int numTerms = myPoly->nX + 1;      // Number of polynomial terms
     329    int numData = x->n;                 // Number of data elements
     330
     331    psImage *A = psImageAlloc(numTerms, numTerms, PS_TYPE_F64); // Least-squares matrix
     332    psVector *B = psVectorAlloc(numTerms, PS_TYPE_F64); // Least-squares vector
     333    B->n = numTerms;
     334    psImageInit(A, 0.0);
     335    psVectorInit(B, 0.0);
     336
     337    psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(numTerms); // The chebyshev polynomials
     338    psImage *sums = psImageAlloc(numData, numTerms, PS_TYPE_F64);
     339    for (int i = 0; i < numTerms; i++) {
     340        if (myPoly->mask[i]) {
     341            continue;
     342        }
     343        psPolynomial1D *cheb = chebPolys[i];
     344        psVector *sum = psPolynomial1DEvalVector(cheb, x);
     345        memcpy(sums->data.F64[i], sum->data.F64, numData*sizeof(psF64));
     346        psFree(sum);
     347        psFree(cheb);
     348    }
     349    psFree(chebPolys);
     350
     351    // Dereference pointers, for speed in the loop
     352    psF64 **matrix = A->data.F64;       // Least-squares matrix
     353    psF64 *vector = B->data.F64;        // Least-squares vector
     354    psU8 *dataMask = NULL;              // Mask for data
     355    if (mask) {
     356        dataMask = mask->data.U8;
     357    }
     358    psU8 *termMask = myPoly->mask;      // Mask for polynomial terms
     359    psF64 *yData = y->data.F64;         // Coordinate data
     360    psF64 *yErrData = NULL;             // Errors in the coordinate
     361    if (yErr) {
     362        yErrData = yErr->data.F64;
     363    }
     364    psF64 **sumsData = sums->data.F64;  // Sums
     365
     366    for (int k = 0; k < numData; k++) {
     367        if (dataMask && dataMask[k]) {
     368            continue;
     369        }
     370
     371        double wt;
     372        if (!yErr) {
     373            wt = 1.0;
     374        } else {
     375            // this filters fErr == 0 values
     376            wt = (yErrData[k] == 0) ? 0.0 : 1.0 / PS_SQR(yErrData[k]);
     377        }
     378
     379        for (int i = 0; i < numTerms; i++) {
     380            if (termMask[i]) {
     381                continue;
     382            }
     383            vector[i] += yData[k] * sumsData[i][k] * wt;
     384            matrix[i][i] += sumsData[i][k] * sumsData[i][k] * wt; // The diagonal entry
     385            for (int j = i + 1; j < numTerms; j++) { // The upper diagonal only: we will use symmetry
     386                if (termMask[j]) {
     387                    continue;
     388                }
     389                double value = sumsData[i][k] * sumsData[j][k] * wt; // The value to add to the matrix
     390                matrix[i][j] += value;
     391                matrix[j][i] += value;  // Taking advantage of the symmetry
     392            }
     393        }
     394    }
     395    psFree(sums);
     396
    390397    if (psTraceGetLevel(__func__) >= 6) {
    391398        PS_IMAGE_PRINT_F64(A);
     
    395402    if (USE_GAUSS_JORDAN) {
    396403        // GaussJordan version
    397         if (false == psGaussJordan(A, B)) {
     404        if (!psMatrixGJSolve(A, B)) {
    398405            psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
    399406            psFree(myPoly);
     
    402409            // the first nTerm entries in B correspond directly to the desired
    403410            // polynomial coefficients.  this is only true for the 1D case
    404             for (psS32 k = 0; k < numPoly; k++) {
     411            for (psS32 k = 0; k < numTerms; k++) {
    405412                myPoly->coeff[k] = B->data.F64[k];
    406             }
     413                myPoly->coeffErr[k] = sqrt(A->data.F64[k][k]);
     414            }
     415            // The constant needs to be multiplied by 2, because it's half the a_0.
     416            myPoly->coeff[0] *= 2.0;
     417            myPoly->coeffErr[0] *= 2.0;
    407418        }
    408419    } else {
     
    412423        psVector* coeffs = NULL;
    413424
    414         ALUD = psImageAlloc(numPoly, numPoly, PS_TYPE_F64);
     425        ALUD = psImageAlloc(numTerms, numTerms, PS_TYPE_F64);
    415426        ALUD = psMatrixLUD(ALUD, &outPerm, A);
    416427        if (ALUD == NULL) {
     
    425436                myPoly = NULL;
    426437            } else {
    427                 for (psS32 k = 0; k < numPoly; k++) {
     438                for (psS32 k = 0; k < numTerms; k++) {
    428439                    myPoly->coeff[k] = coeffs->data.F64[k];
    429440                }
     
    439450    psFree(A);
    440451    psFree(B);
    441     for (psS32 i=0;i<numPoly;i++) {
    442         psFree(chebPolys[i]);
    443     }
    444     psFree(chebPolys);
    445452
    446453    return(myPoly);
    447454}
    448 
    449 
    450 /******************************************************************************
    451 vectorFitPolynomial1DChebSlow():  This routine will fit a Chebyshev polynomial
    452 of degree myPoly to the data points (x, y) and return the coefficients of that
    453 polynomial.
    454  
    455     NOTE: We currently have implemented three algorithms.  This one is
    456     non-standard.  It ignores the orthogonal properties of the Chebyshev
    457     polys, and their known zero values.  Instead, we do build a system of
    458     linear equations based on minimizing the chi-squared for all data points
    459     and we then solve those equations.  This method is significantly slower
    460     than the other algorithms.  It was explicitly requested that we implement
    461     this algorithm.
    462 *****************************************************************************/
    463 static psPolynomial1D *vectorFitPolynomial1DChebySlow(
    464     psPolynomial1D* myPoly,
    465     const psVector *mask,
    466     psMaskType maskValue,
    467     const psVector* y,
    468     const psVector* yErr,
    469     const psVector* x)
    470 {
    471     PS_ASSERT_POLY_NON_NULL(myPoly, NULL);
    472     PS_ASSERT_INT_LARGER_THAN_OR_EQUAL(myPoly->nX, 0, NULL);
    473     PS_ASSERT_VECTOR_NON_NULL(y, NULL);
    474     PS_ASSERT_VECTOR_TYPE(y, PS_TYPE_F64, NULL);
    475     if (yErr != NULL) {
    476         PS_ASSERT_VECTORS_SIZE_EQUAL(y, yErr, NULL);
    477         PS_ASSERT_VECTOR_TYPE(yErr, PS_TYPE_F64, NULL);
    478     }
    479     if (x != NULL) {
    480         PS_ASSERT_VECTORS_SIZE_EQUAL(y, x, NULL);
    481         PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);
    482     }
    483     if (mask != NULL) {
    484         PS_ASSERT_VECTORS_SIZE_EQUAL(y, mask, NULL);
    485         PS_ASSERT_VECTOR_TYPE(mask, PS_TYPE_U8, NULL);
    486     }
    487 
    488     psS32 NUM_POLY = myPoly->nX+1;
    489     psS32 NUM_DATA = 0;
    490     if (mask != NULL) {
    491         for (psS32 d = 0 ; d < mask->n  ; d++) {
    492             if (!(maskValue & mask->data.U8[d])) {
    493                 NUM_DATA++;
    494             }
    495         }
    496     } else {
    497         NUM_DATA = x->n;
    498     }
    499 
    500     psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(NUM_POLY);
    501     if (0) {
    502         for (psS32 j = 0; j < NUM_POLY; j++) {
    503             PS_POLY_PRINT_1D(chebPolys[j]);
    504         }
    505     }
    506 
    507     // Pre-compute the various Chebyshev polys T_i(x[j]) for all x[]
    508     psImage *tMatrix = psImageAlloc(NUM_DATA, NUM_POLY, PS_TYPE_F64);
    509     for (psS32 p = 0 ; p < NUM_POLY ; p++) {
    510         if (mask == NULL) {
    511             for (psS32 d = 0 ; d < NUM_DATA ; d++) {
    512                 tMatrix->data.F64[p][d] = psPolynomial1DEval(chebPolys[p], x->data.F64[d]);
    513             }
    514         } else {
    515             psS32 dPtr = 0;
    516             for (psS32 d = 0 ; d < mask->n ; d++) {
    517                 if (!(maskValue & mask->data.U8[d])) {
    518                     tMatrix->data.F64[p][dPtr] = psPolynomial1DEval(chebPolys[p], x->data.F64[d]);
    519                     dPtr++;
    520                 }
    521             }
    522         }
    523     }
    524 
    525     // Compute the A matrix
    526     psImage *A = psImageAlloc(NUM_POLY, NUM_POLY, PS_TYPE_F64);
    527     for (psS32 i = 0 ; i < NUM_POLY ; i++) {
    528         for (psS32 j = 0 ; j < NUM_POLY ; j++) {
    529             A->data.F64[i][j] = 0.0;
    530             for (psS32 d = 0 ; d < NUM_DATA ; d++) {
    531                 A->data.F64[i][j]+= (tMatrix->data.F64[j][d] * tMatrix->data.F64[i][d]);
    532             }
    533         }
    534         // This is because of the last term in: f(x) = SUM[c_i * T_i(x)]  -  c_0/2
    535         for (psS32 d = 0 ; d < NUM_DATA ; d++) {
    536             A->data.F64[i][0] -= (tMatrix->data.F64[i][d]/2.0);
    537         }
    538     }
    539 
    540     // Compute the B vector
    541     psVector *B = psVectorAlloc(NUM_POLY, PS_TYPE_F64);
    542     B->n = B->nalloc;
    543     for (psS32 i = 0 ; i < NUM_POLY ; i++) {
    544         B->data.F64[i] = 0.0;
    545         if (mask == NULL) {
    546             for (psS32 d = 0 ; d < NUM_DATA ; d++) {
    547                 B->data.F64[i] += (y->data.F64[d] * tMatrix->data.F64[i][d]);
    548             }
    549         } else {
    550             psS32 dPtr = 0;
    551             for (psS32 d = 0 ; d < mask->n ; d++) {
    552                 if (!(maskValue & mask->data.U8[d])) {
    553                     B->data.F64[i] += (y->data.F64[d] * tMatrix->data.F64[i][dPtr++]);
    554                 }
    555             }
    556         }
    557     }
    558 
    559     if (USE_GAUSS_JORDAN) {
    560         // GaussJordan version
    561         if (false == psGaussJordan(A, B)) {
    562             psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
    563             psFree(myPoly);
    564             myPoly = NULL;
    565         } else {
    566             // the first nTerm entries in B correspond directly to the desired
    567             // polynomial coefficients.  this is only true for the 1D case
    568             for (psS32 k = 0; k < NUM_POLY; k++) {
    569                 myPoly->coeff[k] = B->data.F64[k];
    570             }
    571         }
    572     } else {
    573         // LUD version of the fit
    574         psImage *ALUD = NULL;
    575         psVector* outPerm = NULL;
    576         psVector* coeffs = NULL;
    577 
    578         ALUD = psImageAlloc(NUM_POLY, NUM_POLY, PS_TYPE_F64);
    579         ALUD = psMatrixLUD(ALUD, &outPerm, A);
    580         if (ALUD == NULL) {
    581             psError(PS_ERR_UNKNOWN, false, "Could not do LUD decomposition on matrix.  Returning NULL.\n");
    582             psFree(myPoly);
    583             myPoly = NULL;
    584         } else {
    585             coeffs = psMatrixLUSolve(coeffs, ALUD, B, outPerm);
    586             if (coeffs == NULL) {
    587                 psError(PS_ERR_UNKNOWN, false, "Could not solve LUD matrix.  Returning NULL.\n");
    588                 psFree(myPoly);
    589                 myPoly = NULL;
    590             } else {
    591                 for (psS32 k = 0; k < NUM_POLY; k++) {
    592                     myPoly->coeff[k] = coeffs->data.F64[k];
    593                 }
    594             }
    595         }
    596 
    597         psFree(ALUD);
    598         psFree(coeffs);
    599         psFree(outPerm);
    600     }
    601 
    602     psFree(A);
    603     psFree(B);
    604     psFree(tMatrix);
    605     for (psS32 i=0;i<NUM_POLY;i++) {
    606         psFree(chebPolys[i]);
    607     }
    608     psFree(chebPolys);
    609 
    610     return(myPoly);
    611 }
    612 
    613 /******************************************************************************
    614 vectorFitPolynomial1DChebFast():  This routine will fit a Chebyshev polynomial
    615 of degree myPoly to the data points (x, y) and return the coefficients of that
    616 polynomial.
    617  
    618     NOTE: We currently have three algorithms.  This is standard method which
    619     uses the orthogonal properties of the Chebyshev polys, and their known
    620     zero values.  This is significantly faster than the chi-squared
    621     approaches.
    622  
    623     NOTE: This function will not work properly if the x-vector does not fully span
    624     the [-1:1] interval.
    625  
    626     NOTE: mask, maskValue, yErr are ignored with this function.
    627 *****************************************************************************/
    628 static psPolynomial1D *vectorFitPolynomial1DChebyFast(
    629     psPolynomial1D* myPoly,
    630     const psVector *mask,
    631     psMaskType maskValue,
    632     const psVector* y,
    633     const psVector* yErr,
    634     const psVector* x)
    635 {
    636     PS_ASSERT_POLY_NON_NULL(myPoly, NULL);
    637     PS_ASSERT_INT_NONNEGATIVE(myPoly->nX, NULL);
    638     PS_ASSERT_VECTOR_NON_NULL(y, NULL);
    639     PS_ASSERT_VECTOR_TYPE(y, PS_TYPE_F64, NULL);
    640     if (yErr != NULL) {
    641         PS_ASSERT_VECTORS_SIZE_EQUAL(y, yErr, NULL);
    642         PS_ASSERT_VECTOR_TYPE(yErr, PS_TYPE_F64, NULL);
    643     }
    644     if (x != NULL) {
    645         PS_ASSERT_VECTORS_SIZE_EQUAL(y, x, NULL);
    646         PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);
    647     }
    648 
    649     psS32 j;
    650     psS32 k;
    651     psS32 n = y->n;
    652     psF64 fac;
    653     psF64 sum;
    654     PS_VECTOR_GEN_STATIC_RECYCLED(f, n, PS_TYPE_F64);
    655     psScalar *fScalar;
    656     psScalar tmpScalar;
    657     tmpScalar.type.type = PS_TYPE_F64;
    658 
    659     // These assignments appear too simple to warrant code and
    660     // variable declarations.  I retain them here to maintain coherence
    661     // with the NR code.
    662     psF64 min = -1.0;
    663     psF64 max = 1.0;
    664     psF64 bma = 0.5 * (max-min);  // 1
    665     psF64 bpa = 0.5 * (max+min);  // 0
    666 
    667     // In this loop, we first calculate the values of X for which the
    668     // Chebyshev polynomials are zero (see NR, section 5.4).  Then we
    669     // calculate the value of the function we are fitting the Chebyshev
    670     // polynomials to at those values of X.  This is a bit tricky since
    671     // we don't know that function.  So, we instead do 3-order LaGrange
    672     // interpolation at the point X for the psVectors x,y for which we
    673     // are fitting this ChebyShev polynomial to.
    674 
    675     for (psS32 i=0;i<n;i++) {
    676         // NR 5.8.4
    677         // NR 5.8.4
    678         psF64 Y = cos(M_PI * (0.5 + ((psF32) i)) / ((psF32) n));
    679         psF64 X = (Y + bma + bpa) - 1.0;
    680         tmpScalar.data.F64 = X;
    681 
    682         // We interpolate against the tabluated x,y vectors to determine the
    683         // function value at X.
    684         // XXX: This is somewhat of a hack to handle cases where the x vector does
    685         // not fully span the [-1.0:1.0] interval.  We set the values of f[] to the
    686         // values of y[] at those endpoints.
    687         // XXX: This only works if x[] is increasing.
    688 
    689         if (X <= x->data.F64[0]) {
    690             f->data.F64[i] = y->data.F64[0];
    691         } else if (X >= x->data.F64[x->n-1]) {
    692             f->data.F64[i] = y->data.F64[x->n-1];
    693         } else {
    694             fScalar = p_psVectorInterpolate(NULL, (psVector *) x, (psVector *) y, 3, &tmpScalar);
    695             if (fScalar == NULL) {
    696                 psError(PS_ERR_UNKNOWN, false, "Could not perform vector interpolation.  Returning NULL.\n");
    697                 psFree(myPoly)
    698                 return(NULL);
    699             }
    700 
    701             f->data.F64[i] = fScalar->data.F64;
    702             psFree(fScalar);
    703         }
    704 
    705         psTrace(__func__, 6, "(x, X, y, f(X)) is (%f, %f, %f, %f)\n",
    706                 x->data.F64[i], X, y->data.F64[i], f->data.F64[i]);
    707     }
    708 
    709     // We have the values for f() at the zero points, we now calculate the
    710     // coefficients of the Chebyshev polynomial: NR 5.8.7.
    711 
    712     fac = 2.0/((psF32) n);
    713     for (j=0;j<myPoly->nX+1;j++) {
    714         sum = 0.0;
    715         for (k=0;k<n;k++) {
    716             sum+= f->data.F64[k] *
    717                   cos(M_PI * ((psF32) j) * (0.5 + ((psF32) k)) / ((psF32) n));
    718         }
    719 
    720         myPoly->coeff[j] = fac * sum;
    721     }
    722 
    723     return(myPoly);
    724 }
    725 
    726 
    727455
    728456/******************************************************************************
     
    742470    psTrace(__func__, 4, "---- %s() begin ----\n", __func__);
    743471    PS_ASSERT_POLY_NON_NULL(myPoly, NULL);
    744     PS_ASSERT_INT_NONNEGATIVE(myPoly->nX, NULL);
    745472    PS_ASSERT_VECTOR_NON_NULL(f, NULL);
    746473    PS_ASSERT_VECTOR_TYPE(f, PS_TYPE_F64, NULL);
    747     if (mask != NULL) {
     474    if (mask) {
    748475        PS_ASSERT_VECTORS_SIZE_EQUAL(f, mask, NULL);
    749476        PS_ASSERT_VECTOR_TYPE(mask, PS_TYPE_U8, NULL);
    750477    }
    751     if (x != NULL) {
     478    if (x) {
    752479        PS_ASSERT_VECTORS_SIZE_EQUAL(f, x, NULL);
    753480        PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);
    754481    }
    755     if (fErr != NULL) {
     482    if (fErr) {
    756483        PS_ASSERT_VECTORS_SIZE_EQUAL(f, fErr, NULL);
    757484        PS_ASSERT_VECTOR_TYPE(fErr, PS_TYPE_F64, NULL);
    758485    }
    759 
    760     psImage* A = NULL;
    761     psVector* B = NULL;
    762     psVector* xSums = NULL;
    763     psS32 nTerm;
    764     psS32 nOrder;
    765     psF64 wt;
    766486
    767487    if (psTraceGetLevel(__func__) >= 6) {
     
    782502    }
    783503
    784     nTerm = 1 + myPoly->nX;
    785     nOrder = nTerm - 1;
    786 
    787     A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64);
    788     B = psVectorAlloc(nTerm, PS_TYPE_F64);
     504    int nTerm = myPoly->nX + 1;         // Number of terms in the equation
     505    int nData = f->n;                   // Number of data points
     506    psImage *A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); // Least-squares matrix
     507    psVector *B = psVectorAlloc(nTerm, PS_TYPE_F64); // Least-squares vector
    789508    B->n = B->nalloc;
     509
    790510    // Initialize data structures.
    791511    if (!psImageInit(A, 0.0) || !psVectorInit(B, 0.0)) {
     
    798518    }
    799519
    800     // xSums look like: 1, x, x^2, ... x^(2n+1)
    801     // Build the B and A data structs.
    802     // XXX EAM : use temp pointers eg vB = B->data.F64 to save redirects
    803     for (psS32 k = 0; k < f->n; k++) {
    804         if ((mask != NULL) && (mask->data.U8[k] && maskValue)) {
     520    // Build the least squares matrix and vector
     521
     522    // Dereference some pointers for speed in the loop
     523    psU8 *dataMask = NULL;              // Dereferenced version of mask for data points
     524    if (mask) {
     525        dataMask = mask->data.U8;
     526    }
     527    psU8 *termMask = myPoly->mask;      // Dereferenced version of mask for polynomial terms
     528    psF64 *ordinates = NULL;            // Dereferenced version of ordinate data
     529    if (x) {
     530        ordinates = x->data.F64;
     531    }
     532    psF64 *coordinates = f->data.F64;   // Dereferenced version of coordinate data
     533    psF64 *coordErr = NULL;             // Dereferenced version of coordinate errors
     534    if (fErr) {
     535        coordErr = fErr->data.F64;
     536    }
     537    psF64 *vector = B->data.F64;        // Dereferenced version of least-squares vector
     538    psF64 **matrix = A->data.F64;       // Dereferenced version of least-squares matrix
     539
     540    psVector* xSums = NULL;             // Contains 1, x, x^2, x^3, etc, for ease of calculation
     541    for (int k = 0; k < nData; k++) {
     542        if (dataMask && dataMask[k] & maskValue) {
    805543            continue;
    806544        }
    807         if (x != NULL) {
    808             xSums = BuildSums1D(xSums, x->data.F64[k], nTerm);
     545        if (ordinates) {
     546            xSums = BuildSums1D(xSums, ordinates[k], nTerm);
    809547        } else {
    810             xSums = BuildSums1D(xSums, (psF64) k, nTerm);
    811         }
    812 
    813         if (fErr == NULL) {
     548            xSums = BuildSums1D(xSums, (psF64)k, nTerm);
     549        }
     550        psF64 *sums = xSums->data.F64;  // Dereferenced version of sums
     551
     552        double wt;
     553        if (!fErr) {
    814554            wt = 1.0;
    815555        } else {
    816556            // this filters fErr == 0 values
    817             wt = (fErr->data.F64[k] == 0) ? 0.0 : 1.0 / PS_SQR(fErr->data.F64[k]);
    818         }
    819         for (psS32 i = 0; i < nTerm; i++) {
    820             if (myPoly->mask[i])
     557            wt = (coordErr[k] == 0) ? 0.0 : 1.0 / PS_SQR(coordErr[k]);
     558        }
     559
     560        for (int i = 0; i < nTerm; i++) {
     561            if (termMask[i]) {
    821562                continue;
    822             B->data.F64[i] += f->data.F64[k] * xSums->data.F64[i] * wt;
    823         }
    824 
    825         // we could skip half of the array and assign at the end
    826         // we must handle masked orders
    827         for (psS32 i = 0; i < nTerm; i++) {
    828             if (myPoly->mask[i])
    829                 continue;
    830             for (psS32 j = 0; j < nTerm; j++) {
    831                 if (myPoly->mask[j])
     563            }
     564            vector[i] += coordinates[k] * sums[i] * wt;
     565            matrix[i][i] += sums[2 * i] * wt; // The diagonal entry
     566            for (int j = i + 1; j < nTerm; j++) { // The upper diagonal only: we will use symmetry
     567                if (termMask[j]) {
    832568                    continue;
    833                 A->data.F64[i][j] += xSums->data.F64[i + j] * wt;
    834             }
    835         }
    836     }
    837 
     569                }
     570                double value = sums[i + j] * wt; // The value to add to the matrix
     571                matrix[i][j] += value;
     572                matrix[j][i] += value;  // Taking advantage of the symmetry
     573            }
     574        }
     575    }
     576    psFree(xSums);
     577
     578    #if 0
    838579    // set masked elements in A,B appropriately
     580    // PAP: Is this necessary, given we initialise to zero, and we have already masked on the terms?
    839581    for (int i = 0; i < nTerm; i++) {
    840         if (!myPoly->mask[i])
     582        if (!termMask[i])
    841583            continue;
    842         B->data.F64[i] = 0;
    843         for (int j = 0; j < nTerm; j++) {
    844             A->data.F64[i][j] = (i == j) ? 1 : 0;
    845         }
    846     }
    847 
    848 
    849     // XXX: rel10_ifa used psGaussJordan().  However, this failed tests.  So, I'm using psMatrixLUD().
     584        vector[i] = 0;
     585        matrix[i][i] = 1.0;
     586        for (int j = i + 1; j < nTerm; j++) {
     587            matrix[i][j] = 0.0;
     588            matrix[j][i] = 0.0;
     589        }
     590    }
     591    #endif
     592
     593    if (psTraceGetLevel(__func__) >= 4) {
     594        printf("Least-squares vector:\n");
     595        for (int i = 0; i < nTerm; i++) {
     596            printf("%f ", B->data.F64[i]);
     597        }
     598        printf("\n");
     599        printf("Least-squares matrix:\n");
     600        for (int i = 0; i < nTerm; i++) {
     601            for (int j = 0; j < nTerm; j++) {
     602                printf("%f ", A->data.F64[i][j]);
     603            }
     604            printf("\n");
     605        }
     606    }
     607
     608    // XXX: rel10_ifa used psMatrixGJSolve().  However, this failed tests.  So, I'm using psMatrixLUD().
    850609    if (USE_GAUSS_JORDAN) {
    851610        // GaussJordan version
    852         if (false == psGaussJordan(A, B)) {
     611        if (!psMatrixGJSolve(A, B)) {
    853612            psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
    854613            psFree(myPoly);
     
    897656    psFree(A);
    898657    psFree(B);
    899     psFree(xSums);
    900658
    901659    psTrace(__func__, 4, "---- %s() End ----\n", __func__);
     
    924682
    925683    PS_ASSERT_POLY_NON_NULL(poly, NULL);
    926     PS_ASSERT_INT_NONNEGATIVE(poly->nX, NULL);
     684    //PS_ASSERT_INT_NONNEGATIVE(poly->nX, NULL);
    927685    PS_ASSERT_VECTOR_NON_NULL(f, NULL);
    928686    PS_ASSERT_VECTOR_NON_EMPTY(f, NULL);
     
    981739        }
    982740
    983         if (1) {
    984             poly = vectorFitPolynomial1DCheb(poly, mask, maskValue, f64, fErr64, x64);
    985         } else {
    986             if (0) {
    987                 poly = vectorFitPolynomial1DChebySlow(poly, mask, maskValue, f64, fErr64, x64);
    988                 poly = vectorFitPolynomial1DChebyFast(poly, mask, maskValue, f64, fErr64, x64);
    989             }
    990         }
     741        poly = vectorFitPolynomial1DCheb(poly, mask, maskValue, f64, fErr64, x64);
     742
    991743        if (x == NULL) {
    992744            psFree(x64);
     
    1221973    }
    1222974
    1223     // I think this is 1 dimension down
    1224     psImage*     A = NULL;
    1225     psVector*    B = NULL;
    1226     psImage*   Sums = NULL;
    1227     psF64 wt;
    1228     psS32 nTerm;
    1229 
    1230     psS32 nXterm = 1 + myPoly->nX;
    1231     psS32 nYterm = 1 + myPoly->nY;
    1232     nTerm = nXterm * nYterm;
    1233 
    1234     A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64);
    1235     B = psVectorAlloc(nTerm, PS_TYPE_F64);
     975    // Number of polynomial terms
     976    int nXterm = 1 + myPoly->nX;      // Number of terms in x
     977    int nYterm = 1 + myPoly->nY;      // Number of terms in y
     978    int nTerm = nXterm * nYterm;            // Total number of terms
     979
     980    psImage *A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); // Least-squares matrix
     981    psVector *B = psVectorAlloc(nTerm, PS_TYPE_F64); // Least-squares vector
    1236982    B->n = B->nalloc;
     983
    1237984    // Initialize data structures.
    1238985    if (!psImageInit(A, 0.0) || !psVectorInit(B, 0.0)) {
     
    1245992    }
    1246993
    1247     // Sums look like: 1, x, x^2, ... x^(2n+1), y, xy, x^2y, ... x^(2n+1)
    1248 
    1249     // Build the B and A data structs.
    1250     for (psS32 k  = 0; k < x->n; k++) {
    1251         if ((mask != NULL) && (mask->data.U8[k] & maskValue)) {
     994    // Dereference stuff, to make the loop go faster
     995    psF64 **matrix = A->data.F64;       // Dereference the least-squares matrix
     996    psF64 *vector = B->data.F64;        // Dereference the least-squares vector
     997    psU8 **termMask = myPoly->mask;     // Dereference mask for polynomial terms
     998    psU8 *dataMask = NULL;              // Dereference mask for data
     999    if (mask) {
     1000        dataMask = mask->data.U8;
     1001    }
     1002    psF64 *xData = x->data.F64;         // Dereference x
     1003    psF64 *yData = y->data.F64;         // Dereference y
     1004    psF64 *fData = f->data.F64;         // Dereference f
     1005    psF64 *fErrData = NULL;             // Dereference fErr
     1006    if (fErr) {
     1007        fErrData = fErr->data.F64;
     1008    }
     1009
     1010    // Build the least-squares matrix and vector
     1011    psImage *xySums = NULL;               // The sums: 1, x, x^2, ... x^(2n+1), y, xy, x^2y, ... x^(2n+1)
     1012    for (int k = 0; k < x->n; k++) {
     1013        if (dataMask && dataMask[k] & maskValue) {
    12521014            continue;
    12531015        }
    1254 
    1255         Sums = BuildSums2D(Sums, x->data.F64[k], y->data.F64[k], nXterm, nYterm);
    1256 
    1257         if (fErr == NULL) {
     1016        xySums = BuildSums2D(xySums, xData[k], yData[k], nXterm, nYterm);
     1017        psF64 **sums = xySums->data.F64;// Dereference sums
     1018
     1019        double wt;                      // Weight
     1020        if (!fErrData) {
    12581021            wt = 1.0;
    12591022        } else {
    12601023            // this filters fErr == 0 values
    1261             wt = (fErr->data.F64[k] == 0.0) ? 0.0 : 1.0 / PS_SQR(fErr->data.F64[k]);
    1262         }
    1263 
    1264         // we could skip half of the array and assign at the end
    1265         // we must handle masked orders
    1266         for (psS32 n = 0; n < nXterm; n++) {
    1267             for (psS32 m = 0; m < nYterm; m++) {
    1268                 if (myPoly->mask[n][m])
     1024            wt = (fErrData[k] == 0.0) ? 0.0 : 1.0 / PS_SQR(fErrData[k]);
     1025        }
     1026
     1027        // Iterating over the matrix
     1028        for (int i = 0; i < nTerm; i++) {
     1029            int l = i % nXterm;         // x index
     1030            int m = i / nXterm;         // y index
     1031            if (termMask[l][m]) {
     1032                continue;
     1033            }
     1034            vector[i] += fData[k] * sums[l][m] * wt;
     1035            matrix[i][i] += sums[2*l][2*m] * wt; // The diagonal entry
     1036            for (int j = i + 1; j < nTerm; j++) { // Doing the upper diagonal only: we will use symmetry
     1037                int p = j % nXterm;     // x index
     1038                int q = j / nXterm;     // y index
     1039                if (termMask[p][q]) {
    12691040                    continue;
    1270                 B->data.F64[n+m*nXterm] += f->data.F64[k] * Sums->data.F64[n][m] * wt;
    1271             }
    1272         }
    1273 
    1274         for (psS32 i = 0; i < nXterm; i++) {
    1275             for (psS32 j = 0; j < nYterm; j++) {
    1276                 if (myPoly->mask[i][j])
    1277                     continue;
    1278                 for (psS32 n = 0; n < nXterm; n++) {
    1279                     for (psS32 m = 0; m < nYterm; m++) {
    1280                         if (myPoly->mask[n][m])
    1281                             continue;
    1282                         A->data.F64[i+j*nXterm][n+m*nXterm] += Sums->data.F64[i+n][j+m] * wt;
    1283                     }
    1284                 }
    1285             }
    1286         }
    1287     }
    1288 
    1289     // set masked elements appropriately
    1290     for (int i = 0; i < nXterm; i++) {
    1291         for (int j = 0; j < nYterm; j++) {
    1292             if (!myPoly->mask[i][j])
    1293                 continue;
    1294             int nx = i+j*nXterm;
    1295             B->data.F64[nx] = 0;
    1296             for (int n = 0; n < nXterm; n++) {
    1297                 for (int m = 0; m < nYterm; m++) {
    1298                     int ny = n+m*nXterm;
    1299                     A->data.F64[nx][ny] = (nx == ny) ? 1 : 0;
    1300                 }
    1301             }
    1302         }
    1303     }
    1304 
    1305     if (false == psGaussJordan(A, B)) {
     1041                }
     1042                double value = sums[l+p][m+q] * wt; // Value to add in
     1043                matrix[i][j] += value;
     1044                matrix[j][i] += value;  // Taking advantage of the symmetry
     1045            }
     1046        }
     1047    }
     1048    psFree(xySums);
     1049
     1050    if (!psMatrixGJSolve(A, B)) {
    13061051        psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
    13071052        psFree(myPoly);
     
    13091054    } else {
    13101055        // select the appropriate solution entries
    1311         for (psS32 n = 0; n < nXterm; n++) {
    1312             for (psS32 m = 0; m < nYterm; m++) {
     1056        for (int n = 0; n < nXterm; n++) {
     1057            for (int m = 0; m < nYterm; m++) {
    13131058                myPoly->coeff[n][m] = B->data.F64[n+m*nXterm];
    13141059                myPoly->coeffErr[n][m] = sqrt(A->data.F64[n+m*nXterm][n+m*nXterm]);
     
    13191064    psFree(A);
    13201065    psFree(B);
    1321     psFree(Sums);
    13221066
    13231067    psTrace(__func__, 4, "---- %s() end ----\n", __func__);
     
    17671511    }
    17681512
    1769     // XXX: rel10_ifa used psGaussJordan().  However, this failed tests.  So, I'm using psMatrixLUD().
     1513    // XXX: rel10_ifa used psMatrixGJSolve().  However, this failed tests.  So, I'm using psMatrixLUD().
    17701514    if (USE_GAUSS_JORDAN) {
    17711515        // does the solution in place
    17721516        // The matrices were overflowing, so I switched to LUD.
    1773         if (false == psGaussJordan(A, B)) {
     1517        if (!psMatrixGJSolve(A, B)) {
    17741518            psFree(A);
    17751519            psFree(B);
     
    23242068
    23252069
    2326     // XXX: rel10_ifa used psGaussJordan().  However, this failed tests.  So, I'm using psMatrixLUD().
     2070    // XXX: rel10_ifa used psMatrixGJSolve().  However, this failed tests.  So, I'm using psMatrixLUD().
    23272071    if (USE_GAUSS_JORDAN) {
    23282072        // does the solution in place
    23292073        // The GaussJordan version was overflowing, so I'm using LUD.
    2330         if (false == psGaussJordan(A, B)) {
     2074        if (psMatrixGJSolve(A, B)) {
    23312075            psFree(A);
    23322076            psFree(B);
Note: See TracChangeset for help on using the changeset viewer.