Context Navigation

← Previous Change
Next Change →

psMinimizePolyFit.c

Timestamp:

May 10, 2006, 1:38:55 AM (20 years ago)

Author:

Paul Price

Message:

Rewrote vectorFitPolynomial1DCheb to use the standard linear fitting
method (invert least-squares matrix). Tweaked ordinary 1D and 2D
polynomial fitting for speed.

File:

: 1 edited

trunk/psLib/src/math/psMinimizePolyFit.c (modified) (21 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/psLib/src/math/psMinimizePolyFit.c

-              r6939
+              r7104
  *  @author EAM, IfA
+ *
  *  @version $Revision: 1.10 $ $Name: not supported by cvs2svn $
  *  @date $Date: 2006-04-21 20:59:51 $
+ *  @version $Revision: 1.11 $ $Name: not supported by cvs2svn $
+ *  @date $Date: 2006-05-10 11:38:55 $
+ *
  *  Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
 …
 /*****************************************************************************/
 #include <stdio.h>
+#include <string.h>
 #include <float.h>
 #include <math.h>
 …
 polynomial of degree myPoly->nX to the data points (x, y) and return the
 coefficients of that polynomial.
-    NOTE: We currently have implemented three algorithms.  This one is
-    non-standard.  It ignores the orthogonal properties of the Chebyshev
-    polys, and their known zero values.  Instead, we first fit a regular
-    ordinary polynomial to the data.  This produces the coefficients for the
-    various x^i terms.  We then "reverse-engineer" those coefficients to
-    determine the coefficients of the Chebyshev polys: basically for each
-    c_ix^i term in the ordinary polynomial, we sum all x^i terms in the
-    Chebshev polys: these must be equal.  This creates a set of nOrder+1
-    linear equations which can be easily solved.  The resulting vector is the
-    coefficients of the Chebyshev polys.
-    This method is significantly slower than the standard NR algorithm.  It
-    was explicitly requested that we not use the NR algorithm.
- Matrix A[[][]:
-     Element A[i][j] is the coefficient of the x^i term in the j-th cheby poly.
-    XXX: This can be improved significantly, performance-wise.  The second set
-    of linear equations which must be "solved" are already in upper-triangular
-    form.  One must only perform the reverse-substitution, LUD decomposition.
-    XXX: Also, we don't really need to generate the chebPolys data structure.
-    We simply need the matrix which corresponds to a transpose of each Cheby
-    polys coefficients.
 *****************************************************************************/
 static psPolynomial1D *vectorFitPolynomial1DCheb(
 …
+    }
+    psS32 polyOrder = myPoly->nX;
+    psS32 numPoly = polyOrder + 1;
+    //
+    // We first fit an ordinary polynomial to the data.
+    //
+    psPolynomial1D *ordPoly = psPolynomial1DAlloc(PS_POLYNOMIAL_ORD, polyOrder);
+    psPolynomial1D *rc = VectorFitPolynomial1DOrd(ordPoly, mask, maskValue, y, yErr, x);
+    if (rc == NULL) {
+        psError(PS_ERR_UNKNOWN, false, "Could not fit a preliminary polynomial to the data vector.  Returning NULL.\n");
+        psFree(myPoly);
+        return(NULL);
+    }
+    //
+    // Create the A-matrix and B-vector which correspond to the linear equations
+    // which will be solved and will then yield the Cheby poly coefficients.
+    //
+    psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(numPoly);
+    psImage *A = psImageAlloc(numPoly, numPoly, PS_TYPE_F64);
+    psVector *B = psVectorAlloc(numPoly, PS_TYPE_F64);
+    B->n = B->nalloc;
+    for (psS32 i = 0 ; i < numPoly ; i++) {
+        for (psS32 j = 0 ; j < numPoly ; j++) {
+            A->data.F64[i][j] = 0.0;
+        }
+        B->data.F64[i] = ordPoly->coeff[i];
+    }
+    for (psS32 i = 0 ; i < numPoly ; i++) {
+        for (psS32 j = 0 ; j < numPoly ; j++) {
+            if (i <= chebPolys[j]->nX)
+                A->data.F64[i][j]+= chebPolys[j]->coeff[i];
+        }
+    }
+    // The following statement is essential.  It derives from (5.8.8) NR.
+    A->data.F64[0][0] = 0.5;
+    psFree(ordPoly);
+    int numTerms = myPoly->nX + 1;      // Number of polynomial terms
+    int numData = x->n;                 // Number of data elements
+    psImage *A = psImageAlloc(numTerms, numTerms, PS_TYPE_F64); // Least-squares matrix
+    psVector *B = psVectorAlloc(numTerms, PS_TYPE_F64); // Least-squares vector
+    B->n = numTerms;
+    psImageInit(A, 0.0);
+    psVectorInit(B, 0.0);
+    psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(numTerms); // The chebyshev polynomials
+    psImage *sums = psImageAlloc(numData, numTerms, PS_TYPE_F64);
+    for (int i = 0; i < numTerms; i++) {
+        if (myPoly->mask[i]) {
+            continue;
+        }
+        psPolynomial1D *cheb = chebPolys[i];
+        psVector *sum = psPolynomial1DEvalVector(cheb, x);
+        memcpy(sums->data.F64[i], sum->data.F64, numData*sizeof(psF64));
+        psFree(sum);
+        psFree(cheb);
+    }
+    psFree(chebPolys);
+    // Dereference pointers, for speed in the loop
+    psF64 **matrix = A->data.F64;       // Least-squares matrix
+    psF64 *vector = B->data.F64;        // Least-squares vector
+    psU8 *dataMask = NULL;              // Mask for data
+    if (mask) {
+        dataMask = mask->data.U8;
+    }
+    psU8 *termMask = myPoly->mask;      // Mask for polynomial terms
+    psF64 *yData = y->data.F64;         // Coordinate data
+    psF64 *yErrData = NULL;             // Errors in the coordinate
+    if (yErr) {
+        yErrData = yErr->data.F64;
+    }
+    psF64 **sumsData = sums->data.F64;  // Sums
+    for (int k = 0; k < numData; k++) {
+        if (dataMask && dataMask[k]) {
+            continue;
+        }
+        double wt;
+        if (!yErr) {
+            wt = 1.0;
+        } else {
+            // this filters fErr == 0 values
+            wt = (yErrData[k] == 0) ? 0.0 : 1.0 / PS_SQR(yErrData[k]);
+        }
+        for (int i = 0; i < numTerms; i++) {
+            if (termMask[i]) {
+                continue;
+            }
+            vector[i] += yData[k] * sumsData[i][k] * wt;
+            matrix[i][i] += sumsData[i][k] * sumsData[i][k] * wt; // The diagonal entry
+            for (int j = i + 1; j < numTerms; j++) { // The upper diagonal only: we will use symmetry
+                if (termMask[j]) {
+                    continue;
+                }
+                double value = sumsData[i][k] * sumsData[j][k] * wt; // The value to add to the matrix
+                matrix[i][j] += value;
+                matrix[j][i] += value;  // Taking advantage of the symmetry
+            }
+        }
+    }
+    psFree(sums);
     if (psTraceGetLevel(__func__) >= 6) {
         PS_IMAGE_PRINT_F64(A);
 …
     if (USE_GAUSS_JORDAN) {
         // GaussJordan version
         if (false == psGaussJordan(A, B)) {
+        if (!psMatrixGJSolve(A, B)) {
             psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
             psFree(myPoly);
 …
             // the first nTerm entries in B correspond directly to the desired
             // polynomial coefficients.  this is only true for the 1D case
             for (psS32 k = 0; k < numPoly; k++) {
+            for (psS32 k = 0; k < numTerms; k++) {
                 myPoly->coeff[k] = B->data.F64[k];
+            }
+                myPoly->coeffErr[k] = sqrt(A->data.F64[k][k]);
+            }
+            // The constant needs to be multiplied by 2, because it's half the a_0.
+            myPoly->coeff[0] *= 2.0;
+            myPoly->coeffErr[0] *= 2.0;
+        }
     } else {
 …
         psVector* coeffs = NULL;
         ALUD = psImageAlloc(numPoly, numPoly, PS_TYPE_F64);
+        ALUD = psImageAlloc(numTerms, numTerms, PS_TYPE_F64);
         ALUD = psMatrixLUD(ALUD, &outPerm, A);
         if (ALUD == NULL) {
 …
                 myPoly = NULL;
             } else {
                 for (psS32 k = 0; k < numPoly; k++) {
+                for (psS32 k = 0; k < numTerms; k++) {
                     myPoly->coeff[k] = coeffs->data.F64[k];
+                }
 …
     psFree(A);
     psFree(B);
-    for (psS32 i=0;i<numPoly;i++) {
-        psFree(chebPolys[i]);
+    }
-    psFree(chebPolys);
     return(myPoly);
+}
-/******************************************************************************
-vectorFitPolynomial1DChebSlow():  This routine will fit a Chebyshev polynomial
-of degree myPoly to the data points (x, y) and return the coefficients of that
-polynomial.
-    NOTE: We currently have implemented three algorithms.  This one is
-    non-standard.  It ignores the orthogonal properties of the Chebyshev
-    polys, and their known zero values.  Instead, we do build a system of
-    linear equations based on minimizing the chi-squared for all data points
-    and we then solve those equations.  This method is significantly slower
-    than the other algorithms.  It was explicitly requested that we implement
-    this algorithm.
-*****************************************************************************/
-static psPolynomial1D *vectorFitPolynomial1DChebySlow(
-    psPolynomial1D* myPoly,
-    const psVector *mask,
-    psMaskType maskValue,
-    const psVector* y,
-    const psVector* yErr,
-    const psVector* x)
+{
-    PS_ASSERT_POLY_NON_NULL(myPoly, NULL);
-    PS_ASSERT_INT_LARGER_THAN_OR_EQUAL(myPoly->nX, 0, NULL);
-    PS_ASSERT_VECTOR_NON_NULL(y, NULL);
-    PS_ASSERT_VECTOR_TYPE(y, PS_TYPE_F64, NULL);
-    if (yErr != NULL) {
-        PS_ASSERT_VECTORS_SIZE_EQUAL(y, yErr, NULL);
-        PS_ASSERT_VECTOR_TYPE(yErr, PS_TYPE_F64, NULL);
+    }
-    if (x != NULL) {
-        PS_ASSERT_VECTORS_SIZE_EQUAL(y, x, NULL);
-        PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);
+    }
-    if (mask != NULL) {
-        PS_ASSERT_VECTORS_SIZE_EQUAL(y, mask, NULL);
-        PS_ASSERT_VECTOR_TYPE(mask, PS_TYPE_U8, NULL);
+    }
-    psS32 NUM_POLY = myPoly->nX+1;
-    psS32 NUM_DATA = 0;
-    if (mask != NULL) {
-        for (psS32 d = 0 ; d < mask->n  ; d++) {
-            if (!(maskValue & mask->data.U8[d])) {
-                NUM_DATA++;
+            }
+        }
-    } else {
-        NUM_DATA = x->n;
+    }
-    psPolynomial1D **chebPolys = p_psCreateChebyshevPolys(NUM_POLY);
-    if (0) {
-        for (psS32 j = 0; j < NUM_POLY; j++) {
-            PS_POLY_PRINT_1D(chebPolys[j]);
+        }
+    }
-    // Pre-compute the various Chebyshev polys T_i(x[j]) for all x[]
-    psImage *tMatrix = psImageAlloc(NUM_DATA, NUM_POLY, PS_TYPE_F64);
-    for (psS32 p = 0 ; p < NUM_POLY ; p++) {
-        if (mask == NULL) {
-            for (psS32 d = 0 ; d < NUM_DATA ; d++) {
-                tMatrix->data.F64[p][d] = psPolynomial1DEval(chebPolys[p], x->data.F64[d]);
+            }
-        } else {
-            psS32 dPtr = 0;
-            for (psS32 d = 0 ; d < mask->n ; d++) {
-                if (!(maskValue & mask->data.U8[d])) {
-                    tMatrix->data.F64[p][dPtr] = psPolynomial1DEval(chebPolys[p], x->data.F64[d]);
-                    dPtr++;
+                }
+            }
+        }
+    }
-    // Compute the A matrix
-    psImage *A = psImageAlloc(NUM_POLY, NUM_POLY, PS_TYPE_F64);
-    for (psS32 i = 0 ; i < NUM_POLY ; i++) {
-        for (psS32 j = 0 ; j < NUM_POLY ; j++) {
-            A->data.F64[i][j] = 0.0;
-            for (psS32 d = 0 ; d < NUM_DATA ; d++) {
-                A->data.F64[i][j]+= (tMatrix->data.F64[j][d] * tMatrix->data.F64[i][d]);
+            }
+        }
-        // This is because of the last term in: f(x) = SUM[c_i * T_i(x)]  -  c_0/2
-        for (psS32 d = 0 ; d < NUM_DATA ; d++) {
-            A->data.F64[i][0] -= (tMatrix->data.F64[i][d]/2.0);
+        }
+    }
-    // Compute the B vector
-    psVector *B = psVectorAlloc(NUM_POLY, PS_TYPE_F64);
-    B->n = B->nalloc;
-    for (psS32 i = 0 ; i < NUM_POLY ; i++) {
-        B->data.F64[i] = 0.0;
-        if (mask == NULL) {
-            for (psS32 d = 0 ; d < NUM_DATA ; d++) {
-                B->data.F64[i] += (y->data.F64[d] * tMatrix->data.F64[i][d]);
+            }
-        } else {
-            psS32 dPtr = 0;
-            for (psS32 d = 0 ; d < mask->n ; d++) {
-                if (!(maskValue & mask->data.U8[d])) {
-                    B->data.F64[i] += (y->data.F64[d] * tMatrix->data.F64[i][dPtr++]);
+                }
+            }
+        }
+    }
-    if (USE_GAUSS_JORDAN) {
-        // GaussJordan version
-        if (false == psGaussJordan(A, B)) {
-            psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
-            psFree(myPoly);
-            myPoly = NULL;
-        } else {
-            // the first nTerm entries in B correspond directly to the desired
-            // polynomial coefficients.  this is only true for the 1D case
-            for (psS32 k = 0; k < NUM_POLY; k++) {
-                myPoly->coeff[k] = B->data.F64[k];
+            }
+        }
-    } else {
-        // LUD version of the fit
-        psImage *ALUD = NULL;
-        psVector* outPerm = NULL;
-        psVector* coeffs = NULL;
-        ALUD = psImageAlloc(NUM_POLY, NUM_POLY, PS_TYPE_F64);
-        ALUD = psMatrixLUD(ALUD, &outPerm, A);
-        if (ALUD == NULL) {
-            psError(PS_ERR_UNKNOWN, false, "Could not do LUD decomposition on matrix.  Returning NULL.\n");
-            psFree(myPoly);
-            myPoly = NULL;
-        } else {
-            coeffs = psMatrixLUSolve(coeffs, ALUD, B, outPerm);
-            if (coeffs == NULL) {
-                psError(PS_ERR_UNKNOWN, false, "Could not solve LUD matrix.  Returning NULL.\n");
-                psFree(myPoly);
-                myPoly = NULL;
-            } else {
-                for (psS32 k = 0; k < NUM_POLY; k++) {
-                    myPoly->coeff[k] = coeffs->data.F64[k];
+                }
+            }
+        }
-        psFree(ALUD);
-        psFree(coeffs);
-        psFree(outPerm);
+    }
-    psFree(A);
-    psFree(B);
-    psFree(tMatrix);
-    for (psS32 i=0;i<NUM_POLY;i++) {
-        psFree(chebPolys[i]);
+    }
-    psFree(chebPolys);
-    return(myPoly);
+}
-/******************************************************************************
-vectorFitPolynomial1DChebFast():  This routine will fit a Chebyshev polynomial
-of degree myPoly to the data points (x, y) and return the coefficients of that
-polynomial.
-    NOTE: We currently have three algorithms.  This is standard method which
-    uses the orthogonal properties of the Chebyshev polys, and their known
-    zero values.  This is significantly faster than the chi-squared
-    approaches.
-    NOTE: This function will not work properly if the x-vector does not fully span
-    the [-1:1] interval.
-    NOTE: mask, maskValue, yErr are ignored with this function.
-*****************************************************************************/
-static psPolynomial1D *vectorFitPolynomial1DChebyFast(
-    psPolynomial1D* myPoly,
-    const psVector *mask,
-    psMaskType maskValue,
-    const psVector* y,
-    const psVector* yErr,
-    const psVector* x)
+{
-    PS_ASSERT_POLY_NON_NULL(myPoly, NULL);
-    PS_ASSERT_INT_NONNEGATIVE(myPoly->nX, NULL);
-    PS_ASSERT_VECTOR_NON_NULL(y, NULL);
-    PS_ASSERT_VECTOR_TYPE(y, PS_TYPE_F64, NULL);
-    if (yErr != NULL) {
-        PS_ASSERT_VECTORS_SIZE_EQUAL(y, yErr, NULL);
-        PS_ASSERT_VECTOR_TYPE(yErr, PS_TYPE_F64, NULL);
+    }
-    if (x != NULL) {
-        PS_ASSERT_VECTORS_SIZE_EQUAL(y, x, NULL);
-        PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);
+    }
-    psS32 j;
-    psS32 k;
-    psS32 n = y->n;
-    psF64 fac;
-    psF64 sum;
-    PS_VECTOR_GEN_STATIC_RECYCLED(f, n, PS_TYPE_F64);
-    psScalar *fScalar;
-    psScalar tmpScalar;
-    tmpScalar.type.type = PS_TYPE_F64;
-    // These assignments appear too simple to warrant code and
-    // variable declarations.  I retain them here to maintain coherence
-    // with the NR code.
-    psF64 min = -1.0;
-    psF64 max = 1.0;
-    psF64 bma = 0.5 * (max-min);  // 1
-    psF64 bpa = 0.5 * (max+min);  // 0
-    // In this loop, we first calculate the values of X for which the
-    // Chebyshev polynomials are zero (see NR, section 5.4).  Then we
-    // calculate the value of the function we are fitting the Chebyshev
-    // polynomials to at those values of X.  This is a bit tricky since
-    // we don't know that function.  So, we instead do 3-order LaGrange
-    // interpolation at the point X for the psVectors x,y for which we
-    // are fitting this ChebyShev polynomial to.
-    for (psS32 i=0;i<n;i++) {
-        // NR 5.8.4
-        // NR 5.8.4
-        psF64 Y = cos(M_PI * (0.5 + ((psF32) i)) / ((psF32) n));
-        psF64 X = (Y + bma + bpa) - 1.0;
-        tmpScalar.data.F64 = X;
-        // We interpolate against the tabluated x,y vectors to determine the
-        // function value at X.
-        // XXX: This is somewhat of a hack to handle cases where the x vector does
-        // not fully span the [-1.0:1.0] interval.  We set the values of f[] to the
-        // values of y[] at those endpoints.
-        // XXX: This only works if x[] is increasing.
-        if (X <= x->data.F64[0]) {
-            f->data.F64[i] = y->data.F64[0];
-        } else if (X >= x->data.F64[x->n-1]) {
-            f->data.F64[i] = y->data.F64[x->n-1];
-        } else {
-            fScalar = p_psVectorInterpolate(NULL, (psVector *) x, (psVector *) y, 3, &tmpScalar);
-            if (fScalar == NULL) {
-                psError(PS_ERR_UNKNOWN, false, "Could not perform vector interpolation.  Returning NULL.\n");
-                psFree(myPoly)
-                return(NULL);
+            }
-            f->data.F64[i] = fScalar->data.F64;
-            psFree(fScalar);
+        }
-        psTrace(__func__, 6, "(x, X, y, f(X)) is (%f, %f, %f, %f)\n",
-                x->data.F64[i], X, y->data.F64[i], f->data.F64[i]);
+    }
-    // We have the values for f() at the zero points, we now calculate the
-    // coefficients of the Chebyshev polynomial: NR 5.8.7.
-    fac = 2.0/((psF32) n);
-    for (j=0;j<myPoly->nX+1;j++) {
-        sum = 0.0;
-        for (k=0;k<n;k++) {
-            sum+= f->data.F64[k] *
-                  cos(M_PI * ((psF32) j) * (0.5 + ((psF32) k)) / ((psF32) n));
+        }
-        myPoly->coeff[j] = fac * sum;
+    }
-    return(myPoly);
+}
 /******************************************************************************
 …
     psTrace(__func__, 4, "---- %s() begin ----\n", __func__);
     PS_ASSERT_POLY_NON_NULL(myPoly, NULL);
-    PS_ASSERT_INT_NONNEGATIVE(myPoly->nX, NULL);
     PS_ASSERT_VECTOR_NON_NULL(f, NULL);
     PS_ASSERT_VECTOR_TYPE(f, PS_TYPE_F64, NULL);
     if (mask != NULL) {
+    if (mask) {
         PS_ASSERT_VECTORS_SIZE_EQUAL(f, mask, NULL);
         PS_ASSERT_VECTOR_TYPE(mask, PS_TYPE_U8, NULL);
+    }
     if (x != NULL) {
+    if (x) {
         PS_ASSERT_VECTORS_SIZE_EQUAL(f, x, NULL);
         PS_ASSERT_VECTOR_TYPE(x, PS_TYPE_F64, NULL);
+    }
     if (fErr != NULL) {
+    if (fErr) {
         PS_ASSERT_VECTORS_SIZE_EQUAL(f, fErr, NULL);
         PS_ASSERT_VECTOR_TYPE(fErr, PS_TYPE_F64, NULL);
+    }
-    psImage* A = NULL;
-    psVector* B = NULL;
-    psVector* xSums = NULL;
-    psS32 nTerm;
-    psS32 nOrder;
-    psF64 wt;
     if (psTraceGetLevel(__func__) >= 6) {
 …
+    }
+    nTerm = 1 + myPoly->nX;
+    nOrder = nTerm - 1;
+    A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64);
+    B = psVectorAlloc(nTerm, PS_TYPE_F64);
+    int nTerm = myPoly->nX + 1;         // Number of terms in the equation
+    int nData = f->n;                   // Number of data points
+    psImage *A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); // Least-squares matrix
+    psVector *B = psVectorAlloc(nTerm, PS_TYPE_F64); // Least-squares vector
     B->n = B->nalloc;
     // Initialize data structures.
     if (!psImageInit(A, 0.0) || !psVectorInit(B, 0.0)) {
 …
+    }
+    // xSums look like: 1, x, x^2, ... x^(2n+1)
+    // Build the B and A data structs.
+    // XXX EAM : use temp pointers eg vB = B->data.F64 to save redirects
+    for (psS32 k = 0; k < f->n; k++) {
+        if ((mask != NULL) && (mask->data.U8[k] && maskValue)) {
+    // Build the least squares matrix and vector
+    // Dereference some pointers for speed in the loop
+    psU8 *dataMask = NULL;              // Dereferenced version of mask for data points
+    if (mask) {
+        dataMask = mask->data.U8;
+    }
+    psU8 *termMask = myPoly->mask;      // Dereferenced version of mask for polynomial terms
+    psF64 *ordinates = NULL;            // Dereferenced version of ordinate data
+    if (x) {
+        ordinates = x->data.F64;
+    }
+    psF64 *coordinates = f->data.F64;   // Dereferenced version of coordinate data
+    psF64 *coordErr = NULL;             // Dereferenced version of coordinate errors
+    if (fErr) {
+        coordErr = fErr->data.F64;
+    }
+    psF64 *vector = B->data.F64;        // Dereferenced version of least-squares vector
+    psF64 **matrix = A->data.F64;       // Dereferenced version of least-squares matrix
+    psVector* xSums = NULL;             // Contains 1, x, x^2, x^3, etc, for ease of calculation
+    for (int k = 0; k < nData; k++) {
+        if (dataMask && dataMask[k] & maskValue) {
             continue;
+        }
         if (x != NULL) {
             xSums = BuildSums1D(xSums, x->data.F64[k], nTerm);
+        if (ordinates) {
+            xSums = BuildSums1D(xSums, ordinates[k], nTerm);
         } else {
+            xSums = BuildSums1D(xSums, (psF64) k, nTerm);
+        }
+        if (fErr == NULL) {
+            xSums = BuildSums1D(xSums, (psF64)k, nTerm);
+        }
+        psF64 *sums = xSums->data.F64;  // Dereferenced version of sums
+        double wt;
+        if (!fErr) {
             wt = 1.0;
         } else {
             // this filters fErr == 0 values
+            wt = (fErr->data.F64[k] == 0) ? 0.0 : 1.0 / PS_SQR(fErr->data.F64[k]);
+        }
+        for (psS32 i = 0; i < nTerm; i++) {
+            if (myPoly->mask[i])
+            wt = (coordErr[k] == 0) ? 0.0 : 1.0 / PS_SQR(coordErr[k]);
+        }
+        for (int i = 0; i < nTerm; i++) {
+            if (termMask[i]) {
                 continue;
+            B->data.F64[i] += f->data.F64[k] * xSums->data.F64[i] * wt;
+        }
+        // we could skip half of the array and assign at the end
+        // we must handle masked orders
+        for (psS32 i = 0; i < nTerm; i++) {
+            if (myPoly->mask[i])
+                continue;
+            for (psS32 j = 0; j < nTerm; j++) {
+                if (myPoly->mask[j])
+            }
+            vector[i] += coordinates[k] * sums[i] * wt;
+            matrix[i][i] += sums[2 * i] * wt; // The diagonal entry
+            for (int j = i + 1; j < nTerm; j++) { // The upper diagonal only: we will use symmetry
+                if (termMask[j]) {
                     continue;
+                A->data.F64[i][j] += xSums->data.F64[i + j] * wt;
+            }
+        }
+    }
+                }
+                double value = sums[i + j] * wt; // The value to add to the matrix
+                matrix[i][j] += value;
+                matrix[j][i] += value;  // Taking advantage of the symmetry
+            }
+        }
+    }
+    psFree(xSums);
+    #if 0
     // set masked elements in A,B appropriately
+    // PAP: Is this necessary, given we initialise to zero, and we have already masked on the terms?
     for (int i = 0; i < nTerm; i++) {
         if (!myPoly->mask[i])
+        if (!termMask[i])
             continue;
+        B->data.F64[i] = 0;
+        for (int j = 0; j < nTerm; j++) {
+            A->data.F64[i][j] = (i == j) ? 1 : 0;
+        }
+    }
+    // XXX: rel10_ifa used psGaussJordan().  However, this failed tests.  So, I'm using psMatrixLUD().
+        vector[i] = 0;
+        matrix[i][i] = 1.0;
+        for (int j = i + 1; j < nTerm; j++) {
+            matrix[i][j] = 0.0;
+            matrix[j][i] = 0.0;
+        }
+    }
+    #endif
+    if (psTraceGetLevel(__func__) >= 4) {
+        printf("Least-squares vector:\n");
+        for (int i = 0; i < nTerm; i++) {
+            printf("%f ", B->data.F64[i]);
+        }
+        printf("\n");
+        printf("Least-squares matrix:\n");
+        for (int i = 0; i < nTerm; i++) {
+            for (int j = 0; j < nTerm; j++) {
+                printf("%f ", A->data.F64[i][j]);
+            }
+            printf("\n");
+        }
+    }
+    // XXX: rel10_ifa used psMatrixGJSolve().  However, this failed tests.  So, I'm using psMatrixLUD().
     if (USE_GAUSS_JORDAN) {
         // GaussJordan version
         if (false == psGaussJordan(A, B)) {
+        if (!psMatrixGJSolve(A, B)) {
             psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
             psFree(myPoly);
 …
     psFree(A);
     psFree(B);
-    psFree(xSums);
     psTrace(__func__, 4, "---- %s() End ----\n", __func__);
 …
     PS_ASSERT_POLY_NON_NULL(poly, NULL);
     PS_ASSERT_INT_NONNEGATIVE(poly->nX, NULL);
+    //PS_ASSERT_INT_NONNEGATIVE(poly->nX, NULL);
     PS_ASSERT_VECTOR_NON_NULL(f, NULL);
     PS_ASSERT_VECTOR_NON_EMPTY(f, NULL);
 …
+        }
+        if (1) {
+            poly = vectorFitPolynomial1DCheb(poly, mask, maskValue, f64, fErr64, x64);
+        } else {
+            if (0) {
+                poly = vectorFitPolynomial1DChebySlow(poly, mask, maskValue, f64, fErr64, x64);
+                poly = vectorFitPolynomial1DChebyFast(poly, mask, maskValue, f64, fErr64, x64);
+            }
+        }
+        poly = vectorFitPolynomial1DCheb(poly, mask, maskValue, f64, fErr64, x64);
         if (x == NULL) {
             psFree(x64);
 …
+    }
+    // I think this is 1 dimension down
+    psImage*     A = NULL;
+    psVector*    B = NULL;
+    psImage*   Sums = NULL;
+    psF64 wt;
+    psS32 nTerm;
+    psS32 nXterm = 1 + myPoly->nX;
+    psS32 nYterm = 1 + myPoly->nY;
+    nTerm = nXterm * nYterm;
+    A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64);
+    B = psVectorAlloc(nTerm, PS_TYPE_F64);
+    // Number of polynomial terms
+    int nXterm = 1 + myPoly->nX;      // Number of terms in x
+    int nYterm = 1 + myPoly->nY;      // Number of terms in y
+    int nTerm = nXterm * nYterm;            // Total number of terms
+    psImage *A = psImageAlloc(nTerm, nTerm, PS_TYPE_F64); // Least-squares matrix
+    psVector *B = psVectorAlloc(nTerm, PS_TYPE_F64); // Least-squares vector
     B->n = B->nalloc;
     // Initialize data structures.
     if (!psImageInit(A, 0.0) || !psVectorInit(B, 0.0)) {
 …
+    }
+    // Sums look like: 1, x, x^2, ... x^(2n+1), y, xy, x^2y, ... x^(2n+1)
+    // Build the B and A data structs.
+    for (psS32 k  = 0; k < x->n; k++) {
+        if ((mask != NULL) && (mask->data.U8[k] & maskValue)) {
+    // Dereference stuff, to make the loop go faster
+    psF64 **matrix = A->data.F64;       // Dereference the least-squares matrix
+    psF64 *vector = B->data.F64;        // Dereference the least-squares vector
+    psU8 **termMask = myPoly->mask;     // Dereference mask for polynomial terms
+    psU8 *dataMask = NULL;              // Dereference mask for data
+    if (mask) {
+        dataMask = mask->data.U8;
+    }
+    psF64 *xData = x->data.F64;         // Dereference x
+    psF64 *yData = y->data.F64;         // Dereference y
+    psF64 *fData = f->data.F64;         // Dereference f
+    psF64 *fErrData = NULL;             // Dereference fErr
+    if (fErr) {
+        fErrData = fErr->data.F64;
+    }
+    // Build the least-squares matrix and vector
+    psImage *xySums = NULL;               // The sums: 1, x, x^2, ... x^(2n+1), y, xy, x^2y, ... x^(2n+1)
+    for (int k = 0; k < x->n; k++) {
+        if (dataMask && dataMask[k] & maskValue) {
             continue;
+        }
+        Sums = BuildSums2D(Sums, x->data.F64[k], y->data.F64[k], nXterm, nYterm);
+        if (fErr == NULL) {
+        xySums = BuildSums2D(xySums, xData[k], yData[k], nXterm, nYterm);
+        psF64 **sums = xySums->data.F64;// Dereference sums
+        double wt;                      // Weight
+        if (!fErrData) {
             wt = 1.0;
         } else {
             // this filters fErr == 0 values
+            wt = (fErr->data.F64[k] == 0.0) ? 0.0 : 1.0 / PS_SQR(fErr->data.F64[k]);
+        }
+        // we could skip half of the array and assign at the end
+        // we must handle masked orders
+        for (psS32 n = 0; n < nXterm; n++) {
+            for (psS32 m = 0; m < nYterm; m++) {
+                if (myPoly->mask[n][m])
+            wt = (fErrData[k] == 0.0) ? 0.0 : 1.0 / PS_SQR(fErrData[k]);
+        }
+        // Iterating over the matrix
+        for (int i = 0; i < nTerm; i++) {
+            int l = i % nXterm;         // x index
+            int m = i / nXterm;         // y index
+            if (termMask[l][m]) {
+                continue;
+            }
+            vector[i] += fData[k] * sums[l][m] * wt;
+            matrix[i][i] += sums[2*l][2*m] * wt; // The diagonal entry
+            for (int j = i + 1; j < nTerm; j++) { // Doing the upper diagonal only: we will use symmetry
+                int p = j % nXterm;     // x index
+                int q = j / nXterm;     // y index
+                if (termMask[p][q]) {
                     continue;
+                B->data.F64[n+m*nXterm] += f->data.F64[k] * Sums->data.F64[n][m] * wt;
+            }
+        }
+        for (psS32 i = 0; i < nXterm; i++) {
+            for (psS32 j = 0; j < nYterm; j++) {
+                if (myPoly->mask[i][j])
+                    continue;
+                for (psS32 n = 0; n < nXterm; n++) {
+                    for (psS32 m = 0; m < nYterm; m++) {
+                        if (myPoly->mask[n][m])
+                            continue;
+                        A->data.F64[i+j*nXterm][n+m*nXterm] += Sums->data.F64[i+n][j+m] * wt;
+                    }
+                }
+            }
+        }
+    }
+    // set masked elements appropriately
+    for (int i = 0; i < nXterm; i++) {
+        for (int j = 0; j < nYterm; j++) {
+            if (!myPoly->mask[i][j])
+                continue;
+            int nx = i+j*nXterm;
+            B->data.F64[nx] = 0;
+            for (int n = 0; n < nXterm; n++) {
+                for (int m = 0; m < nYterm; m++) {
+                    int ny = n+m*nXterm;
+                    A->data.F64[nx][ny] = (nx == ny) ? 1 : 0;
+                }
+            }
+        }
+    }
+    if (false == psGaussJordan(A, B)) {
+                }
+                double value = sums[l+p][m+q] * wt; // Value to add in
+                matrix[i][j] += value;
+                matrix[j][i] += value;  // Taking advantage of the symmetry
+            }
+        }
+    }
+    psFree(xySums);
+    if (!psMatrixGJSolve(A, B)) {
         psError(PS_ERR_UNKNOWN, false, "Could not solve linear equations.  Returning NULL.\n");
         psFree(myPoly);
 …
     } else {
         // select the appropriate solution entries
         for (psS32 n = 0; n < nXterm; n++) {
             for (psS32 m = 0; m < nYterm; m++) {
+        for (int n = 0; n < nXterm; n++) {
+            for (int m = 0; m < nYterm; m++) {
                 myPoly->coeff[n][m] = B->data.F64[n+m*nXterm];
                 myPoly->coeffErr[n][m] = sqrt(A->data.F64[n+m*nXterm][n+m*nXterm]);
 …
     psFree(A);
     psFree(B);
-    psFree(Sums);
     psTrace(__func__, 4, "---- %s() end ----\n", __func__);
 …
+    }
     // XXX: rel10_ifa used psGaussJordan().  However, this failed tests.  So, I'm using psMatrixLUD().
+    // XXX: rel10_ifa used psMatrixGJSolve().  However, this failed tests.  So, I'm using psMatrixLUD().
     if (USE_GAUSS_JORDAN) {
         // does the solution in place
         // The matrices were overflowing, so I switched to LUD.
         if (false == psGaussJordan(A, B)) {
+        if (!psMatrixGJSolve(A, B)) {
             psFree(A);
             psFree(B);
 …
     // XXX: rel10_ifa used psGaussJordan().  However, this failed tests.  So, I'm using psMatrixLUD().
+    // XXX: rel10_ifa used psMatrixGJSolve().  However, this failed tests.  So, I'm using psMatrixLUD().
     if (USE_GAUSS_JORDAN) {
         // does the solution in place
         // The GaussJordan version was overflowing, so I'm using LUD.
         if (false == psGaussJordan(A, B)) {
+        if (psMatrixGJSolve(A, B)) {
             psFree(A);
             psFree(B);

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 7104 for trunk/psLib/src/math/psMinimizePolyFit.c

Legend:

trunk/psLib/src/math/psMinimizePolyFit.c

Download in other formats: