Index: trunk/psLib/src/imageops/psImageConvolve.c
===================================================================
--- trunk/psLib/src/imageops/psImageConvolve.c	(revision 20462)
+++ trunk/psLib/src/imageops/psImageConvolve.c	(revision 20830)
@@ -7,6 +7,6 @@
 /// @author Eugene Magnier, IfA
 ///
-/// @version $Revision: 1.80 $ $Name: not supported by cvs2svn $
-/// @date $Date: 2008-10-29 20:43:18 $
+/// @version $Revision: 1.81 $ $Name: not supported by cvs2svn $
+/// @date $Date: 2008-11-26 00:43:12 $
 ///
 /// Copyright 2004-2007 Institute for Astronomy, University of Hawaii
@@ -750,76 +750,76 @@
     switch (image->type.type) {
       case PS_TYPE_F32: {
-	  psImage *calculation = psImageAlloc(numRows, numCols, PS_TYPE_F32); /* Calculation image; BW */
-	  psImage *calcMask = psImageAlloc(numRows, numCols, PS_TYPE_MASK); /* Mask for calculation image; BW */
-   
-	  /** Smooth in X direction **/
-	  for (int j = 0; j < numRows; j++) {
-	      for (int i = 0; i < numCols; i++) {
-		  int xMin = PS_MAX(i - size, 0);
-		  int xMax = PS_MIN(i + size, xLast);
-		  const psMaskType *maskData = &mask->data.PS_TYPE_MASK_DATA[j][xMin];
-		  const psF32 *imageData = &image->data.F32[j][xMin];
-		  int uMin = - PS_MIN(i, size); /* Minimum kernel index */
-		  const psF32 *gaussData = &gauss[uMin];
-		  double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
-		  for (int x = xMin; x <= xMax; x++, maskData++, imageData++, gaussData++) {
-		      if (*maskData & maskVal) {
-			  continue;
-		      }
-		      // if ((i == 1000) && (j > 10) && (j < 15)) {
-		      // 	  fprintf (stderr, "%d %d  %d  %f  %f   %f %f\n", i, j, x, *imageData, *gaussData, sumIG, sumG);
-		      // }
-		      sumIG += *imageData * *gaussData;
-		      sumG += *gaussData;
-		  }
-		  if (sumG > minGauss) {
-		      /* BW */
-		      calculation->data.F32[i][j] = sumIG / sumG;
-		      calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0;
-		  } else {
-		      /* BW */
-		      calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0xFF;
-		  }
-	      }
-	  }
-   
-	  output = psImageRecycle(output, numCols, numRows, PS_TYPE_F32);
-   
-	  /** Smooth in Y direction  **/
-	  for (int i = 0; i < numCols; i++) {
-	      for (int j = 0; j < numRows; j++) {
-		  int yMin = PS_MAX(j - size, 0);
-		  int yMax = PS_MIN(j + size, yLast);
-		  const psMaskType *maskData = &calcMask->data.PS_TYPE_MASK_DATA[i][yMin]; /* BW */
-		  const psF32 *imageData = &calculation->data.F32[i][yMin]; /* BW */
-		  int vMin = - PS_MIN(j, size); /* Minimum kernel index */
-		  const psF32 *gaussData = &gauss[vMin];
-		  double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
-		  for (int y = yMin; y <= yMax; y++, maskData++, imageData++, gaussData++) {
-		      if (*maskData) {
-			  continue;
-		      }
-		      sumIG += *imageData * *gaussData;
-		      sumG += *gaussData;
-		  }
-		  output->data.F32[j][i] = (sumG > minGauss) ? sumIG / sumG : NAN;
-	      }
-	  }
-   
-	  psFree(calculation);
-	  psFree(calcMask);
-	  psFree(gaussNorm);
-   
-	  return output;
+          psImage *calculation = psImageAlloc(numRows, numCols, PS_TYPE_F32); /* Calculation image; BW */
+          psImage *calcMask = psImageAlloc(numRows, numCols, PS_TYPE_MASK); /* Mask for calculation image; BW */
+
+          /** Smooth in X direction **/
+          for (int j = 0; j < numRows; j++) {
+              for (int i = 0; i < numCols; i++) {
+                  int xMin = PS_MAX(i - size, 0);
+                  int xMax = PS_MIN(i + size, xLast);
+                  const psMaskType *maskData = &mask->data.PS_TYPE_MASK_DATA[j][xMin];
+                  const psF32 *imageData = &image->data.F32[j][xMin];
+                  int uMin = - PS_MIN(i, size); /* Minimum kernel index */
+                  const psF32 *gaussData = &gauss[uMin];
+                  double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
+                  for (int x = xMin; x <= xMax; x++, maskData++, imageData++, gaussData++) {
+                      if (*maskData & maskVal) {
+                          continue;
+                      }
+                      // if ((i == 1000) && (j > 10) && (j < 15)) {
+                      //          fprintf (stderr, "%d %d  %d  %f  %f   %f %f\n", i, j, x, *imageData, *gaussData, sumIG, sumG);
+                      // }
+                      sumIG += *imageData * *gaussData;
+                      sumG += *gaussData;
+                  }
+                  if (sumG > minGauss) {
+                      /* BW */
+                      calculation->data.F32[i][j] = sumIG / sumG;
+                      calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0;
+                  } else {
+                      /* BW */
+                      calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0xFF;
+                  }
+              }
+          }
+
+          output = psImageRecycle(output, numCols, numRows, PS_TYPE_F32);
+
+          /** Smooth in Y direction  **/
+          for (int i = 0; i < numCols; i++) {
+              for (int j = 0; j < numRows; j++) {
+                  int yMin = PS_MAX(j - size, 0);
+                  int yMax = PS_MIN(j + size, yLast);
+                  const psMaskType *maskData = &calcMask->data.PS_TYPE_MASK_DATA[i][yMin]; /* BW */
+                  const psF32 *imageData = &calculation->data.F32[i][yMin]; /* BW */
+                  int vMin = - PS_MIN(j, size); /* Minimum kernel index */
+                  const psF32 *gaussData = &gauss[vMin];
+                  double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
+                  for (int y = yMin; y <= yMax; y++, maskData++, imageData++, gaussData++) {
+                      if (*maskData) {
+                          continue;
+                      }
+                      sumIG += *imageData * *gaussData;
+                      sumG += *gaussData;
+                  }
+                  output->data.F32[j][i] = (sumG > minGauss) ? sumIG / sumG : NAN;
+              }
+          }
+
+          psFree(calculation);
+          psFree(calcMask);
+          psFree(gaussNorm);
+
+          return output;
       }
-	IMAGESMOOTH_MASK_CASE(F64);
+        IMAGESMOOTH_MASK_CASE(F64);
       default:
-	psFree(gaussNorm);
-	char *typeStr;
-	PS_TYPE_NAME(typeStr,image->type.type);
-	psError(PS_ERR_BAD_PARAMETER_TYPE, true,
-		_("Specified psImage type, %s, is not supported."),
-		typeStr);
-	return false;
+        psFree(gaussNorm);
+        char *typeStr;
+        PS_TYPE_NAME(typeStr,image->type.type);
+        psError(PS_ERR_BAD_PARAMETER_TYPE, true,
+                _("Specified psImage type, %s, is not supported."),
+                typeStr);
+        return false;
     }
 
@@ -828,14 +828,14 @@
 }
 
-bool psImageSmoothMask_ScanRows_F32 (psImage *calculation, 
-				     psImage *calcMask, 
-				     const psImage *image, 
-				     const psImage *mask, 
-				     psMaskType maskVal,
-				     psVector *gaussNorm, 
-				     float minGauss, 
-				     int size, 
-				     int rowStart, 
-				     int rowStop) 
+bool psImageSmoothMask_ScanRows_F32 (psImage *calculation,
+                                     psImage *calcMask,
+                                     const psImage *image,
+                                     const psImage *mask,
+                                     psMaskType maskVal,
+                                     psVector *gaussNorm,
+                                     float minGauss,
+                                     int size,
+                                     int rowStart,
+                                     int rowStop)
 {
     const psF32 *gauss = &gaussNorm->data.F32[size]; // Gaussian convolution kernel
@@ -845,28 +845,28 @@
 
     for (int j = rowStart; j < rowStop; j++) {
-	for (int i = 0; i < numCols; i++) {
-	    int xMin = PS_MAX(i - size, 0);
-	    int xMax = PS_MIN(i + size, xLast);
-	    const psMaskType *maskData = &mask->data.PS_TYPE_MASK_DATA[j][xMin];
-	    const psF32 *imageData = &image->data.F32[j][xMin];
-	    int uMin = - PS_MIN(i, size); /* Minimum kernel index */
-	    const psF32 *gaussData = &gauss[uMin];
-	    double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
-	    for (int x = xMin; x <= xMax; x++, maskData++, imageData++, gaussData++) {
-		if (*maskData & maskVal) {
-		    continue;
-		}
-		sumIG += *imageData * *gaussData;
-		sumG += *gaussData;
-	    }
-	    if (sumG > minGauss) {
-		/* BW */
-		calculation->data.F32[i][j] = sumIG / sumG;
-		calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0;
-	    } else {
-		/* BW */
-		calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0xFF;
-	    }
-	}
+        for (int i = 0; i < numCols; i++) {
+            int xMin = PS_MAX(i - size, 0);
+            int xMax = PS_MIN(i + size, xLast);
+            const psMaskType *maskData = &mask->data.PS_TYPE_MASK_DATA[j][xMin];
+            const psF32 *imageData = &image->data.F32[j][xMin];
+            int uMin = - PS_MIN(i, size); /* Minimum kernel index */
+            const psF32 *gaussData = &gauss[uMin];
+            double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
+            for (int x = xMin; x <= xMax; x++, maskData++, imageData++, gaussData++) {
+                if (*maskData & maskVal) {
+                    continue;
+                }
+                sumIG += *imageData * *gaussData;
+                sumG += *gaussData;
+            }
+            if (sumG > minGauss) {
+                /* BW */
+                calculation->data.F32[i][j] = sumIG / sumG;
+                calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0;
+            } else {
+                /* BW */
+                calcMask->data.PS_TYPE_MASK_DATA[i][j] = 0xFF;
+            }
+        }
     }
     return true;
@@ -874,12 +874,12 @@
 
 bool psImageSmoothMask_ScanCols_F32 (psImage *output,
-				     psImage *calculation, 
-				     psImage *calcMask, 
-				     psMaskType maskVal,
-				     psVector *gaussNorm, 
-				     float minGauss, 
-				     int size, 
-				     int colStart, 
-				     int colStop) 
+                                     psImage *calculation,
+                                     psImage *calcMask,
+                                     psMaskType maskVal,
+                                     psVector *gaussNorm,
+                                     float minGauss,
+                                     int size,
+                                     int colStart,
+                                     int colStop)
 {
     const psF32 *gauss = &gaussNorm->data.F32[size]; // Gaussian convolution kernel
@@ -889,25 +889,25 @@
 
     for (int i = colStart; i < colStop; i++) {
-	for (int j = 0; j < numRows; j++) {
-	    int yMin = PS_MAX(j - size, 0);
-	    int yMax = PS_MIN(j + size, yLast);
-	    const psMaskType *maskData = &calcMask->data.PS_TYPE_MASK_DATA[i][yMin]; /* BW */
-	    const psF32 *imageData = &calculation->data.F32[i][yMin]; /* BW */
-	    int vMin = - PS_MIN(j, size); /* Minimum kernel index */
-	    const psF32 *gaussData = &gauss[vMin];
-	    double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
-	    for (int y = yMin; y <= yMax; y++, maskData++, imageData++, gaussData++) {
-		if (*maskData) {
-		    continue;
-		}
-		sumIG += *imageData * *gaussData;
-		sumG += *gaussData;
-	    }
-	    output->data.F32[j][i] = (sumG > minGauss) ? sumIG / sumG : NAN;
-	}
+        for (int j = 0; j < numRows; j++) {
+            int yMin = PS_MAX(j - size, 0);
+            int yMax = PS_MIN(j + size, yLast);
+            const psMaskType *maskData = &calcMask->data.PS_TYPE_MASK_DATA[i][yMin]; /* BW */
+            const psF32 *imageData = &calculation->data.F32[i][yMin]; /* BW */
+            int vMin = - PS_MIN(j, size); /* Minimum kernel index */
+            const psF32 *gaussData = &gauss[vMin];
+            double sumIG = 0.0, sumG = 0.0; /* Sums for convolution */
+            for (int y = yMin; y <= yMax; y++, maskData++, imageData++, gaussData++) {
+                if (*maskData) {
+                    continue;
+                }
+                sumIG += *imageData * *gaussData;
+                sumG += *gaussData;
+            }
+            output->data.F32[j][i] = (sumG > minGauss) ? sumIG / sumG : NAN;
+        }
     }
     return true;
 }
-   
+
 bool psImageSmoothMask_ScanRows_F32_Threaded(psThreadJob *job)
 {
@@ -921,13 +921,13 @@
     const psImage *mask   = job->args->data[3]; // input mask
 
-    psMaskType maskVal 	  = PS_SCALAR_VALUE(job->args->data[4],U8);
-    psVector *gaussNorm	  = job->args->data[5]; // gauss kernel
-    float minGauss    	  = PS_SCALAR_VALUE(job->args->data[6],F32);
-    int size           	  = PS_SCALAR_VALUE(job->args->data[7],S32);
-    int rowStart       	  = PS_SCALAR_VALUE(job->args->data[8],S32);
-    int rowStop        	  = PS_SCALAR_VALUE(job->args->data[9],S32);
-    return psImageSmoothMask_ScanRows_F32(calculation, calcMask, image, mask, maskVal, 
-					  gaussNorm, minGauss, size, 
-					  rowStart, rowStop);
+    psMaskType maskVal    = PS_SCALAR_VALUE(job->args->data[4],U8);
+    psVector *gaussNorm   = job->args->data[5]; // gauss kernel
+    float minGauss        = PS_SCALAR_VALUE(job->args->data[6],F32);
+    int size              = PS_SCALAR_VALUE(job->args->data[7],S32);
+    int rowStart          = PS_SCALAR_VALUE(job->args->data[8],S32);
+    int rowStop           = PS_SCALAR_VALUE(job->args->data[9],S32);
+    return psImageSmoothMask_ScanRows_F32(calculation, calcMask, image, mask, maskVal,
+                                          gaussNorm, minGauss, size,
+                                          rowStart, rowStop);
 }
 
@@ -941,23 +941,23 @@
     psImage *calculation  = job->args->data[1]; // calculation image
     psImage *calcMask     = job->args->data[2]; // calculation mask
-    psMaskType maskVal 	  = PS_SCALAR_VALUE(job->args->data[3],U8);
-
-    psVector *gaussNorm	  = job->args->data[4]; // gauss kernel
-    float minGauss    	  = PS_SCALAR_VALUE(job->args->data[5],F32);
-    int size           	  = PS_SCALAR_VALUE(job->args->data[6],S32);
-    int colStart       	  = PS_SCALAR_VALUE(job->args->data[7],S32);
-    int colStop        	  = PS_SCALAR_VALUE(job->args->data[8],S32);
-    return psImageSmoothMask_ScanCols_F32(output, calculation, calcMask, maskVal, 
-					  gaussNorm, minGauss, size,
-					  colStart, colStop);
+    psMaskType maskVal    = PS_SCALAR_VALUE(job->args->data[3],U8);
+
+    psVector *gaussNorm   = job->args->data[4]; // gauss kernel
+    float minGauss        = PS_SCALAR_VALUE(job->args->data[5],F32);
+    int size              = PS_SCALAR_VALUE(job->args->data[6],S32);
+    int colStart          = PS_SCALAR_VALUE(job->args->data[7],S32);
+    int colStop           = PS_SCALAR_VALUE(job->args->data[8],S32);
+    return psImageSmoothMask_ScanCols_F32(output, calculation, calcMask, maskVal,
+                                          gaussNorm, minGauss, size,
+                                          colStart, colStop);
 }
 
 psImage *psImageSmoothMask_Threaded(psImage *output,
-				    const psImage *image,
-				    const psImage *mask,
-				    psMaskType maskVal,
-				    float sigma,
-				    float numSigma,
-				    float minGauss)
+                                    const psImage *image,
+                                    const psImage *mask,
+                                    psMaskType maskVal,
+                                    float sigma,
+                                    float numSigma,
+                                    float minGauss)
 {
     PS_ASSERT_IMAGE_NON_NULL(image, NULL);
@@ -993,84 +993,84 @@
 
     switch (image->type.type) {
-	// Smooth an image with masked pixels
-	// The calculation and calcMask images are *deliberately* backwards (row,col instead of col,row or
-	// [col][row] instead of [row][col]) to optimise the iteration over rows; such instances are marked "BW"
+        // Smooth an image with masked pixels
+        // The calculation and calcMask images are *deliberately* backwards (row,col instead of col,row or
+        // [col][row] instead of [row][col]) to optimise the iteration over rows; such instances are marked "BW"
 
       case PS_TYPE_F32: {
-	  psImage *calculation = psImageAlloc(numRows, numCols, PS_TYPE_F32); /* Calculation image; BW */
-	  psImage *calcMask = psImageAlloc(numRows, numCols, PS_TYPE_MASK); /* Mask for calculation image; BW */
-   
-	  /** Smooth in X direction **/
-	  for (int rowStart = 0; rowStart < numRows; rowStart+=scanRows) {
-	      int rowStop = PS_MIN (rowStart + scanRows, numRows);
-
-	      // allocate a job, construct the arguments for this job
-	      psThreadJob *job = psThreadJobAlloc("PSLIB_IMAGE_SMOOTHMASK_SCANROWS");
-	      psArrayAdd(job->args, 1, calculation);
-	      psArrayAdd(job->args, 1, calcMask);
-	      psArrayAdd(job->args, 1, (psImage *) image); // cast away const
-	      psArrayAdd(job->args, 1, (psImage *) mask); // cast away const
-	      PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8);
-	      psArrayAdd(job->args, 1, gaussNorm);
-	      PS_ARRAY_ADD_SCALAR(job->args, minGauss, PS_TYPE_F32);
-	      PS_ARRAY_ADD_SCALAR(job->args, size,     PS_TYPE_S32);
-	      PS_ARRAY_ADD_SCALAR(job->args, rowStart, PS_TYPE_S32);
-	      PS_ARRAY_ADD_SCALAR(job->args, rowStop,  PS_TYPE_S32);
-	      // -> psImageSmoothMask_ScanRows_F32 (calculation, calcMask, image, mask, maskVal, gauss, minGauss, size, rowStart, rowStop);
-
-	      // if threading is not active, we simply run the job and return
-	      if (!psThreadJobAddPending(job)) {
-		  psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
-		  psFree(job);
-		  return false;
-	      }
-	      psFree(job);
-
-	  }
-
-	  // wait here for the threaded jobs to finish (NOP if threading is not active)
-	  if (!psThreadPoolWait(true)) {
-	      psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
-	      return false;
-	  }
-
-	  output = psImageRecycle(output, numCols, numRows, PS_TYPE_F32);
-   
-	  /** Smooth in Y direction  **/
-	  for (int colStart = 0; colStart < numCols; colStart+=scanCols) {
-	      int colStop = PS_MIN (colStart + scanCols, numCols);
-
-	      // allocate a job, construct the arguments for this job
-	      psThreadJob *job = psThreadJobAlloc("PSLIB_IMAGE_SMOOTHMASK_SCANCOLS");
-	      psArrayAdd(job->args, 1, output);
-	      psArrayAdd(job->args, 1, calculation);
-	      psArrayAdd(job->args, 1, calcMask);
-	      PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8);
-	      psArrayAdd(job->args, 1, gaussNorm);
-	      PS_ARRAY_ADD_SCALAR(job->args, minGauss, PS_TYPE_F32);
-	      PS_ARRAY_ADD_SCALAR(job->args, size,     PS_TYPE_S32);
-	      PS_ARRAY_ADD_SCALAR(job->args, colStart, PS_TYPE_S32);
-	      PS_ARRAY_ADD_SCALAR(job->args, colStop,  PS_TYPE_S32);
-	      // -> psImageSmoothMask_ScanCols_F32 (output, calculation, calcMask, maskVal, gauss, minGauss, size, colStart, colStop);
-
-	      // if threading is not active, we simply run the job and return
-	      if (!psThreadJobAddPending(job)) {
-		  psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
-		  psFree(job);
-		  return false;
-	      }
-	      psFree(job);
-	  }
-
-	  // wait here for the threaded jobs to finish (NOP if threading is not active)
-	  if (!psThreadPoolWait(true)) {
-	      psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
-	      return false;
-	  }
-	  psFree(calculation);
-	  psFree(calcMask);
-	  psFree(gaussNorm);
-   
-	  return output;
+          psImage *calculation = psImageAlloc(numRows, numCols, PS_TYPE_F32); /* Calculation image; BW */
+          psImage *calcMask = psImageAlloc(numRows, numCols, PS_TYPE_MASK); /* Mask for calculation image; BW */
+
+          /** Smooth in X direction **/
+          for (int rowStart = 0; rowStart < numRows; rowStart+=scanRows) {
+              int rowStop = PS_MIN (rowStart + scanRows, numRows);
+
+              // allocate a job, construct the arguments for this job
+              psThreadJob *job = psThreadJobAlloc("PSLIB_IMAGE_SMOOTHMASK_SCANROWS");
+              psArrayAdd(job->args, 1, calculation);
+              psArrayAdd(job->args, 1, calcMask);
+              psArrayAdd(job->args, 1, (psImage *) image); // cast away const
+              psArrayAdd(job->args, 1, (psImage *) mask); // cast away const
+              PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8);
+              psArrayAdd(job->args, 1, gaussNorm);
+              PS_ARRAY_ADD_SCALAR(job->args, minGauss, PS_TYPE_F32);
+              PS_ARRAY_ADD_SCALAR(job->args, size,     PS_TYPE_S32);
+              PS_ARRAY_ADD_SCALAR(job->args, rowStart, PS_TYPE_S32);
+              PS_ARRAY_ADD_SCALAR(job->args, rowStop,  PS_TYPE_S32);
+              // -> psImageSmoothMask_ScanRows_F32 (calculation, calcMask, image, mask, maskVal, gauss, minGauss, size, rowStart, rowStop);
+
+              // if threading is not active, we simply run the job and return
+              if (!psThreadJobAddPending(job)) {
+                  psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
+                  psFree(job);
+                  return false;
+              }
+              psFree(job);
+
+          }
+
+          // wait here for the threaded jobs to finish (NOP if threading is not active)
+          if (!psThreadPoolWait(true)) {
+              psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
+              return false;
+          }
+
+          output = psImageRecycle(output, numCols, numRows, PS_TYPE_F32);
+
+          /** Smooth in Y direction  **/
+          for (int colStart = 0; colStart < numCols; colStart+=scanCols) {
+              int colStop = PS_MIN (colStart + scanCols, numCols);
+
+              // allocate a job, construct the arguments for this job
+              psThreadJob *job = psThreadJobAlloc("PSLIB_IMAGE_SMOOTHMASK_SCANCOLS");
+              psArrayAdd(job->args, 1, output);
+              psArrayAdd(job->args, 1, calculation);
+              psArrayAdd(job->args, 1, calcMask);
+              PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8);
+              psArrayAdd(job->args, 1, gaussNorm);
+              PS_ARRAY_ADD_SCALAR(job->args, minGauss, PS_TYPE_F32);
+              PS_ARRAY_ADD_SCALAR(job->args, size,     PS_TYPE_S32);
+              PS_ARRAY_ADD_SCALAR(job->args, colStart, PS_TYPE_S32);
+              PS_ARRAY_ADD_SCALAR(job->args, colStop,  PS_TYPE_S32);
+              // -> psImageSmoothMask_ScanCols_F32 (output, calculation, calcMask, maskVal, gauss, minGauss, size, colStart, colStop);
+
+              // if threading is not active, we simply run the job and return
+              if (!psThreadJobAddPending(job)) {
+                  psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
+                  psFree(job);
+                  return false;
+              }
+              psFree(job);
+          }
+
+          // wait here for the threaded jobs to finish (NOP if threading is not active)
+          if (!psThreadPoolWait(true)) {
+              psError(PS_ERR_UNKNOWN, false, "Unable to smooth image");
+              return false;
+          }
+          psFree(calculation);
+          psFree(calcMask);
+          psFree(gaussNorm);
+
+          return output;
       }
       default:
@@ -1507,6 +1507,7 @@
 // XXX for now, either thread all or none
 // have to call this before calling psImageSmoothMask_Threaded
-void psImageConvolveSetThreads(bool set)
-{
+bool psImageConvolveSetThreads(bool set)
+{
+    bool old = threaded;                // Old value
     if (set && !threaded) {
         {
@@ -1534,4 +1535,5 @@
     }
     threaded = set;
+    return old;
 }
 
