Index: trunk/psLib/src/imageops/psImageConvolve.c
===================================================================
--- trunk/psLib/src/imageops/psImageConvolve.c	(revision 7077)
+++ trunk/psLib/src/imageops/psImageConvolve.c	(revision 7089)
@@ -5,6 +5,6 @@
  *  @author Robert DeSonia, MHPCC
  *
- *  @version $Revision: 1.34 $ $Name: not supported by cvs2svn $
- *  @date $Date: 2006-05-05 23:55:56 $
+ *  @version $Revision: 1.35 $ $Name: not supported by cvs2svn $
+ *  @date $Date: 2006-05-09 03:16:52 $
  *
  *  Copyright 2004-2005 Maui High Performance Computing Center, University of Hawaii
@@ -588,5 +588,5 @@
 
     // relevant terms
-    int Nrange = sigma*Nsigma + 0.5;    // Number of pixels either side
+    int Nrange = sigma*Nsigma + 0.5;    // Number of pixels either side for convolution kernel
     int Npixel = 2*Nrange + 1;          // Total number of pixels in convolution kernel
     int Nx = image->numCols;            // Number of columns
@@ -596,123 +596,154 @@
 case PS_TYPE_##TYPE: { \
         /* generate normalized gaussian */ \
-        psVector *gaussnorm = psVectorAlloc (Npixel, PS_TYPE_##TYPE); \
+        psVector *gaussnorm = psVectorAlloc(Npixel, PS_TYPE_##TYPE); \
         gaussnorm->n = gaussnorm->nalloc; \
-        double sum = 0.0; \
-        double factor = -0.5/(sigma*sigma); \
-        for (int i = -Nrange; i < Nrange + 1; i++) { \
-            gaussnorm->data.TYPE[i+Nrange] = exp (factor*i*i); \
-            sum += gaussnorm->data.TYPE[i+Nrange]; \
-        } \
-        for (int i = -Nrange; i < Nrange + 1; i++) { \
-            gaussnorm->data.TYPE[i+Nrange] /= sum; \
+        { \
+            double sum = 0.0; \
+            double factor = -0.5/(sigma*sigma); \
+            for (int i = -Nrange; i < Nrange + 1; i++) { \
+                gaussnorm->data.TYPE[i+Nrange] = exp(factor*i*i); \
+                sum += gaussnorm->data.TYPE[i+Nrange]; \
+            } \
+            for (int i = -Nrange; i < Nrange + 1; i++) { \
+                gaussnorm->data.TYPE[i+Nrange] /= sum; \
+            } \
         } \
         ps##TYPE *gauss = &gaussnorm->data.TYPE[Nrange]; \
         \
-        /* smooth in X direction */ \
-        psVector *temp = psVectorAlloc (Nx, PS_TYPE_##TYPE); \
-        temp->n = temp->nalloc; \
-        for (int j = 0; j < Ny; j++) { \
-            ps##TYPE *vi = image->data.TYPE[j]; \
-            ps##TYPE *vo = temp->data.TYPE; \
-            /* smooth first Nrange pixels, with renorm */ \
-            int xMax = PS_MIN(Nrange, Nx); \
-            for (int i = 0; i < xMax; i++, vi++, vo++) { \
-                ps##TYPE *vr = vi - i; \
-                ps##TYPE *vg = gauss - i; \
-                double g = 0; \
-                double s = 0; \
-                for (int n = -i; n < Nrange + 1; n++, vr++, vg++) { \
-                    s += *vg * *vr; \
-                    g += *vg; \
+        /* Smooth in X direction */ \
+        { \
+            psVector *calculation = psVectorAlloc(Nx, PS_TYPE_##TYPE); \
+            calculation->n = calculation->nalloc; \
+            for (int j = 0; j < Ny; j++) { \
+                ps##TYPE *vi = image->data.TYPE[j]; \
+                ps##TYPE *vo = calculation->data.TYPE; \
+                int xMax = PS_MIN(Nrange, Nx); \
+                int convRange = PS_MIN(Nrange + 1, Nx); \
+                /* Smooth first Nrange pixels, with renorm */ \
+                for (int i = 0; i < xMax; i++, vi++, vo++) { \
+                    ps##TYPE *vr = vi - i; \
+                    ps##TYPE *vg = gauss - i; \
+                    double g = 0.0; \
+                    double s = 0.0; \
+                    for (int n = -i; n < convRange; n++, vr++, vg++) { \
+                        s += *vg * *vr; \
+                        g += *vg; \
+                    } \
+                    *vo = s / g; \
                 } \
-                *vo = s / g; \
-            } \
-            /* smooth middle pixels */ \
-            for (int i = Nrange; i < Nx - Nrange; i++, vi++, vo++) { \
-                ps##TYPE *vr = vi - Nrange; \
-                ps##TYPE *vg = gauss - Nrange; \
-                double s = 0; \
-                for (int n = -Nrange; n < Nrange + 1; n++, vr++, vg++) { \
-                    s += *vg * *vr; \
+                /* If that's all the pixels we have, then we're done already */ \
+                if (Nx > Nrange) { \
+                    /* Smooth middle pixels */ \
+                    for (int i = Nrange; i < Nx - Nrange; i++, vi++, vo++) { \
+                        ps##TYPE *vr = vi - Nrange; \
+                        ps##TYPE *vg = gauss - Nrange; \
+                        double s = 0; \
+                        for (int n = -Nrange; n < Nrange + 1; n++, vr++, vg++) { \
+                            s += *vg * *vr; \
+                        } \
+                        *vo = s; \
+                    } \
+                    /* Smooth last Nrange pixels, with renorm */ \
+                    /* XXX does this miss the last column? */ \
+                    for (int i = Nx - Nrange; i < Nx; i++, vi++, vo++) { \
+                        ps##TYPE *vr = vi - Nrange; \
+                        ps##TYPE *vg = gauss - Nrange; \
+                        double g = 0.0; \
+                        double s = 0.0; \
+                        for (int n = -Nrange; n < Nx - i; n++, vr++, vg++) { \
+                            s += *vg * *vr; \
+                            g += *vg; \
+                        } \
+                        *vo = s / g; \
+                    } \
                 } \
-                *vo = s; \
-            } \
-            /* smooth last Nrange pixels, with renorm */ \
-            /* XXX does this miss the last column? */ \
-            for (int i = PS_MAX(Nx - Nrange, 0); i < Nx; i++, vi++, vo++) { \
-                ps##TYPE *vr = vi - Nrange; \
-                ps##TYPE *vg = gauss - Nrange; \
-                double g = 0; \
-                double s = 0; \
-                for (int n = -Nrange; n < Nx - i - 1; n++, vr++, vg++) { \
-                    s += *vg * *vr; \
-                    g += *vg; \
-                } \
-                *vo = s / g; \
-            } \
-            memcpy (image->data.TYPE[j], temp->data.TYPE, Nx*sizeof(ps##TYPE)); \
-        } \
-        psFree (temp); \
-        \
-        /* smooth in Y direction */ \
-        psArray *temprows = psArrayAlloc (Nrange); \
-        temprows->n = Nrange; \
+                memcpy(image->data.TYPE[j], calculation->data.TYPE, Nx*sizeof(ps##TYPE)); \
+            } \
+            psFree(calculation); \
+        } \
+        \
+        /* Smooth in Y direction */ \
+        psArray *rows = psArrayAlloc(Nrange); \
+        rows->n = Nrange; \
+        /* Smooth the first Nrange pixels, with renorm */ \
         int yMax = PS_MIN(Nrange, Ny); \
+        int convRange = PS_MIN(Nrange + 1, Ny); \
         for (int j = 0; j < yMax; j++) { \
-            temp = psVectorAlloc (Nx, PS_TYPE_##TYPE); \
-            temp->n = temp->nalloc; \
-            /* zero the output row */ \
-            memset (temp->data.TYPE, 0, Nx*sizeof(PS_TYPE_##TYPE)); \
-            for (int n = -j, sum = 0; n < Nrange + 1; n++) { sum += gauss[n]; } \
-            for (int n = -j; n < Nrange + 1; n++) { \
+            psVector *calculation = psVectorAlloc(Nx, PS_TYPE_##TYPE); \
+            calculation->n = calculation->nalloc; \
+            /* Zero the output row */ \
+            memset(calculation->data.TYPE, 0, Nx*sizeof(ps##TYPE)); \
+            double sum = 0.0; \
+            for (int n = -j; n < convRange; n++) { sum += gauss[n]; } \
+            for (int n = -j; n < convRange; n++) { \
                 ps##TYPE *vi = image->data.TYPE[j+n]; \
-                ps##TYPE *vo = temp->data.TYPE; \
-                double g = gauss[n]; \
+                ps##TYPE *vo = calculation->data.TYPE; \
+                double g = gauss[n] / sum; \
                 for (int i = 0; i < Nx; i++, vi++, vo++) { \
                     *vo += *vi * g; \
                 } \
             } \
-            /* save output rows on temp array */ \
-            temprows->data[j] = temp; \
-        } \
-        for (int j = Nrange; j < Ny - Nrange; j++) { \
-            /* save the Nrange-offset output row, then zero */ \
-            int Nr = j % Nrange; \
-            temp = temprows->data[Nr]; \
-            memcpy (image->data.TYPE[j-Nrange], temp->data.TYPE, Nx*sizeof(ps##TYPE)); \
-            memset (temp->data.TYPE, 0, Nx*sizeof(PS_TYPE_##TYPE)); \
-            for (int n = -Nrange; n < Nrange + 1; n++) { \
-                ps##TYPE *vi = image->data.TYPE[j+n]; \
-                ps##TYPE *vo = temp->data.TYPE; \
-                double g = gauss[n]; \
-                for (int i = 0; i < Nx; i++, vi++, vo++) { \
-                    *vo += *vi * g; \
+            /* Save output rows on temp array of rows */ \
+            rows->data[j] = calculation; \
+        } \
+        if (Ny < Nrange) { \
+            /* Need to save the first bit, then we're done */ \
+            for (int j = 0; j < Ny; j++) {  \
+                psVector *save = rows->data[j]; \
+                memcpy(image->data.TYPE[j], save->data.TYPE, Nx*sizeof(ps##TYPE)); \
+            } \
+        } else { \
+            /* Smooth middle pixels */ \
+            psVector *calculation = psVectorAlloc(Nx, PS_TYPE_##TYPE); \
+            for (int j = Nrange; j < Ny - Nrange; j++) { \
+                memset(calculation->data.TYPE, 0, Nx*sizeof(ps##TYPE)); \
+                for (int n = -Nrange; n < Nrange + 1; n++) { \
+                    ps##TYPE *vi = image->data.TYPE[j+n]; \
+                    ps##TYPE *vo = calculation->data.TYPE; \
+                    double g = gauss[n]; \
+                    for (int i = 0; i < Nx; i++, vi++, vo++) { \
+                        *vo += *vi * g; \
+                    } \
                 } \
-            } \
-        } \
-        for (int j = PS_MAX(Ny - Nrange, 0); j < Ny; j++) { \
-            /* save the Nrange-offset output row, then zero */ \
-            int Nr = j % Nrange; \
-            temp = temprows->data[Nr]; \
-            memcpy (image->data.TYPE[j-Nrange], temp->data.TYPE, Nx*sizeof(ps##TYPE)); \
-            memset (temp->data.TYPE, 0, Nx*sizeof(PS_TYPE_##TYPE)); \
-            for (int n = -Nrange, sum = 0; n < Ny - j - 1; n++) { sum += gauss[n]; } \
-            for (int n = -Nrange; n < Ny - j - 1; n++) { \
-                ps##TYPE *vi = image->data.TYPE[j+n]; \
-                ps##TYPE *vo = temp->data.TYPE; \
-                double g = gauss[n]; \
-                for (int i = 0; i < Nx; i++, vi++, vo++) { \
-                    *vo += *vi * g; \
+                /* Write the output row */ \
+                int Nr = j % Nrange; \
+                psVector *save = rows->data[Nr]; \
+                memcpy(image->data.TYPE[j-Nrange], save->data.TYPE, Nx*sizeof(ps##TYPE)); \
+                /* Juggle the pointers, so that next run we use the one we just wrote */ \
+                rows->data[Nr] = calculation; \
+                calculation = save; \
+            } \
+            /* Smooth last Nrange pixels, with renorm */ \
+            for (int j = Ny - Nrange; j < Ny; j++) { \
+                /* save the Nrange-offset output row, then zero */ \
+                memset(calculation->data.TYPE, 0, Nx*sizeof(ps##TYPE)); \
+                double sum = 0.0; \
+                for (int n = -Nrange; n < Ny - j; n++) { sum += gauss[n]; } \
+                for (int n = -Nrange; n < Ny - j; n++) { \
+                    ps##TYPE *vi = image->data.TYPE[j+n]; \
+                    ps##TYPE *vo = calculation->data.TYPE; \
+                    double g = gauss[n] / sum; \
+                    for (int i = 0; i < Nx; i++, vi++, vo++) { \
+                        *vo += *vi * g; \
+                    } \
                 } \
-            } \
-        } \
-        for (int j = Ny; j < Ny + Nrange; j++) { \
-            /* save the Nrange-offset output row, then zero */ \
-            int Nr = j % Nrange; \
-            temp = temprows->data[Nr]; \
-            memcpy (image->data.TYPE[j-Nrange], temp->data.TYPE, Nx*sizeof(ps##TYPE)); \
-        } \
-        psFree (temprows); \
-        psFree (gaussnorm); \
+                /* Write the output row */ \
+                int Nr = j % Nrange; \
+                psVector *save = rows->data[Nr]; \
+                memcpy(image->data.TYPE[j-Nrange], save->data.TYPE, Nx*sizeof(ps##TYPE)); \
+                /* Juggle the pointers, so that next run we use the one we just wrote */ \
+                rows->data[Nr] = calculation; \
+                calculation = save; \
+            } \
+            psFree(calculation); \
+            /* Write the remaining rows */ \
+            for (int j = Ny; j < Ny + Nrange; j++) {  \
+                int Nr = j % Nrange; \
+                psVector *save = rows->data[Nr]; \
+                memcpy(image->data.TYPE[j-Nrange], save->data.TYPE, Nx*sizeof(ps##TYPE)); \
+            } \
+        } \
+        psFree(rows); \
+        psFree(gaussnorm); \
         break; \
     }
