Index: /trunk/psphot/src/Makefile.am
===================================================================
--- /trunk/psphot/src/Makefile.am	(revision 21165)
+++ /trunk/psphot/src/Makefile.am	(revision 21166)
@@ -79,4 +79,5 @@
 	psphotMakeFluxScale.c	       \
 	psphotCheckStarDistribution.c  \
+	psphotThreadTools.c  	       \
 	psphotAddNoise.c
 
Index: /trunk/psphot/src/psphot.h
===================================================================
--- /trunk/psphot/src/psphot.h	(revision 21165)
+++ /trunk/psphot/src/psphot.h	(revision 21166)
@@ -12,13 +12,4 @@
 
 #define PSPHOT_RECIPE_PSF_FAKE_ALLOW "PSF.FAKE.ALLOW" // Name for recipe component permitting fake PSFs
-
-typedef struct {
-    pmReadout *readout;
-    psArray *sources;
-    pmPSF *psf;
-    psRegion *region;
-    psMaskType maskVal;
-    psMaskType markVal;
-} psphotGuessModelForRegionArgs;
 
 // top-level psphot functions
@@ -43,5 +34,4 @@
 pmDetections   *psphotFindDetections (pmDetections *detections, pmReadout *readout, psMetadata *recipe);
 
-psArray        *psphotSourceStats (pmReadout *readout, psMetadata *recipe, pmDetections *detections);
 bool            psphotRoughClass (pmReadout *readout, psArray *sources, psMetadata *recipe, const bool findPsfClump);
 bool            psphotBasicDeblend (psArray *sources, psMetadata *recipe);
@@ -50,16 +40,32 @@
 bool            psphotMomentsStats (pmReadout *readout, psMetadata *recipe, psArray *sources);
 bool            psphotFitSourcesLinear (pmReadout *readout, psArray *sources, psMetadata *recipe, pmPSF *psf, bool final);
-bool            psphotGuessModels (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf);
-bool            psphotGuessModelForRegion (psphotGuessModelForRegionArgs *args);
-bool            psphotBlendFit (pmReadout *readout, psArray *sources, psMetadata *recipe, pmPSF *psf);
 bool            psphotReplaceUnfitSources (psArray *sources, psMaskType maskVal);
 bool            psphotReplaceAllSources (psArray *sources, psMetadata *recipe);
 bool            psphotRemoveAllSources (psArray *sources, psMetadata *recipe);
-bool            psphotApResid (pmReadout *readout, psArray *sources, psMetadata *recipe, pmPSF *psf);
-bool            psphotMagnitudes (pmConfig *config, const pmFPAview *view, psArray *sources, psMetadata *recipe, pmPSF *psf);
+
+bool            psphotBlendFit (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf);
+bool            psphotBlendFit_Threaded (psThreadJob *job);
+
+psArray        *psphotSourceStats (pmConfig *config, pmReadout *readout, pmDetections *detections);
+bool            psphotSourceStats_Threaded (psThreadJob *job);
+
+bool            psphotGuessModels (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf);
+bool            psphotGuessModel_Threaded (psThreadJob *job);
+
+bool            psphotMagnitudes (pmConfig *config, pmReadout *readout, const pmFPAview *view, psArray *sources, pmPSF *psf);
+bool            psphotMagnitudes_Threaded (psThreadJob *job);
+
+bool            psphotApResid (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf);
+bool            psphotApResidMags_Threaded (psThreadJob *job);
+
 bool            psphotSkyReplace (pmConfig *config, const pmFPAview *view);
 bool            psphotExtendedSourceAnalysis (pmReadout *readout, psArray *sources, psMetadata *recipe);
 bool            psphotExtendedSourceFits (pmReadout *readout, psArray *sources, psMetadata *recipe);
+
+// thread-related:
 bool            psphotSetThreads ();
+bool            psphotChooseCellSizes (int *Cx, int *Cy, pmReadout *readout, int nThreads);
+bool            psphotCoordToCell (int *group, int *cell, float x, float y, int Cx, int Cy);
+psArray        *psphotAssignSources (int Cx, int Cy, psArray *sources);
 
 // used by psphotFindDetections
Index: /trunk/psphot/src/psphotApResid.c
===================================================================
--- /trunk/psphot/src/psphotApResid.c	(revision 21165)
+++ /trunk/psphot/src/psphotApResid.c	(revision 21166)
@@ -4,5 +4,5 @@
 // measure the aperture residual statistics and 2D variations
 
-bool psphotApResid (pmReadout *readout, psArray *sources, psMetadata *recipe, pmPSF *psf)
+bool psphotApResid (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf)
 {
     int Nfail = 0;
@@ -13,10 +13,21 @@
     pmSource *source;
 
-    PS_ASSERT_PTR_NON_NULL(psf, false);
+    PS_ASSERT_PTR_NON_NULL(config, false);
     PS_ASSERT_PTR_NON_NULL(readout, false);
     PS_ASSERT_PTR_NON_NULL(sources, false);
-    PS_ASSERT_PTR_NON_NULL(recipe, false);
+    PS_ASSERT_PTR_NON_NULL(psf, false);
 
     psTimerStart ("psphot.apresid");
+
+    // select the appropriate recipe information
+    psMetadata *recipe  = psMetadataLookupPtr (&status, config->recipes, PSPHOT_RECIPE);
+    assert (recipe);
+
+    // determine the number of allowed threads
+    int nThreads = psMetadataLookupS32(&status, config->arguments, "NTHREADS"); // Number of threads
+    if (!status) {
+	nThreads = 0;
+    }
+    // nThreads = 0; // XXX until testing is complete, do not thread this function
 
     bool measureAptrend = psMetadataLookupBool (&status, recipe, "MEASURE.APTREND");
@@ -70,4 +81,62 @@
     pmSourceMagnitudesInit (recipe);
 
+    // threaded measurement of the source magnitudes
+    // choose Cx, Cy (see psphotThreadTools.c for overview of the concepts)
+    int Cx = 1, Cy = 1;
+    psphotChooseCellSizes (&Cx, &Cy, readout, nThreads);
+
+    psArray *cellGroups = psphotAssignSources (Cx, Cy, sources);
+
+    for (int i = 0; i < cellGroups->n; i++) {
+
+	psArray *cells = cellGroups->data[i];
+
+	for (int j = 0; j < cells->n; j++) {
+
+	    // allocate a job -- if threads are not defined, this just runs the job
+	    psThreadJob *job = psThreadJobAlloc ("PSPHOT_APRESID_MAGS");
+
+	    psArrayAdd(job->args, 1, cells->data[j]); // sources
+	    psArrayAdd(job->args, 1, psf);
+	    PS_ARRAY_ADD_SCALAR(job->args, photMode, PS_TYPE_S32);
+	    PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8); // XXX change this to use abstract mask type info
+
+	    PS_ARRAY_ADD_SCALAR(job->args, 0,        PS_TYPE_S32); // this is used as a return value for Nskip
+	    PS_ARRAY_ADD_SCALAR(job->args, 0,        PS_TYPE_S32); // this is used as a return value for Nfail
+
+	    if (!psThreadJobAddPending(job)) {
+		psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+		psFree (job);
+		return false;
+	    }
+	    psFree(job);
+
+	}
+
+	// wait for the threads to finish and manage results
+	if (!psThreadPoolWait (false)) {
+	    psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+	    return false;
+	}
+
+	// we have only supplied one type of job, so we can assume the types here
+	psThreadJob *job = NULL;
+	while ((job = psThreadJobGetDone()) != NULL) {
+	    if (job->args->n < 1) {
+		fprintf (stderr, "error with job\n");
+	    } else {
+		psScalar *scalar = NULL;
+		scalar = job->args->data[4];
+		Nskip += scalar->data.S32;
+		scalar = job->args->data[5];
+		Nfail += scalar->data.S32;
+	    }
+	    psFree(job);
+	}
+    }
+
+    psFree (cellGroups);
+
+    // gather the stats to assess the aperture residuals
     psVector *mask    = psVectorAllocEmpty (300, PS_TYPE_U8);
     psVector *mag     = psVectorAllocEmpty (300, PS_TYPE_F32);
@@ -78,5 +147,4 @@
     Npsf = 0;
 
-    // select all good PM_SOURCE_TYPE_STAR entries
     for (int i = 0; i < sources->n; i++) {
         source = sources->data[i];
@@ -89,15 +157,5 @@
         if (source->mode &  PM_SOURCE_MODE_POOR) SKIPSTAR ("POOR STAR");
 
-        // get growth-corrected, apTrend-uncorrected magnitudes in scaled apertures
-        // will fail if below S/N threshold or model is missing
-        if (!pmSourceMagnitudes (source, psf, photMode, maskVal)) {
-            Nskip ++;
-            psTrace ("psphot", 3, "skip : bad source mag");
-            continue;
-        }
-
         if (!isfinite(source->apMag) || !isfinite(source->psfMag)) {
-            Nfail ++;
-            psTrace ("psphot", 3, "fail : nan mags : %f %f", source->apMag, source->psfMag);
             continue;
         }
@@ -206,5 +264,5 @@
 
     psLogMsg ("psphot.apresid", PS_LOG_DETAIL, "aperture residual: %f +/- %f\n", psf->ApResid, psf->dApResid);
-    psLogMsg ("psphot.apresid", PS_LOG_INFO, "measure full-frame aperture residuals for: %f sec\n", psTimerMark ("psphot.apresid"));
+    psLogMsg ("psphot.apresid", PS_LOG_INFO, "measure full-frame aperture residuals for %d sources: %f sec\n", Npsf, psTimerMark ("psphot.apresid"));
 
     psFree (mag);
@@ -392,2 +450,45 @@
 }
 
+bool psphotApResidMags_Threaded (psThreadJob *job) {
+
+    int Nskip = 0;
+    int Nfail = 0;
+
+    psScalar *scalar = NULL;
+
+    psArray *sources                = job->args->data[0];
+    pmPSF *psf                      = job->args->data[1];
+    pmSourcePhotometryMode photMode = PS_SCALAR_VALUE(job->args->data[2],S32);
+    psMaskType maskVal              = PS_SCALAR_VALUE(job->args->data[3],U8);
+
+    for (int i = 0; i < sources->n; i++) {
+        pmSource *source = (pmSource *) sources->data[i];
+
+        if (source->type != PM_SOURCE_TYPE_STAR) SKIPSTAR ("NOT STAR");
+        if (source->mode &  PM_SOURCE_MODE_SATSTAR) SKIPSTAR ("SATSTAR");
+        if (source->mode &  PM_SOURCE_MODE_BLEND) SKIPSTAR ("BLEND");
+        if (source->mode &  PM_SOURCE_MODE_FAIL) SKIPSTAR ("FAIL STAR");
+        if (source->mode &  PM_SOURCE_MODE_POOR) SKIPSTAR ("POOR STAR");
+
+        if (!pmSourceMagnitudes (source, psf, photMode, maskVal)) {
+            Nskip ++;
+            psTrace ("psphot", 3, "skip : bad source mag");
+            continue;
+        }
+
+        if (!isfinite(source->apMag) || !isfinite(source->psfMag)) {
+            Nfail ++;
+            psTrace ("psphot", 3, "fail : nan mags : %f %f", source->apMag, source->psfMag);
+            continue;
+        }
+    }
+
+    // change the value of a scalar on the array (wrap this and put it in psArray.h)
+    scalar = job->args->data[4];
+    scalar->data.S32 = Nskip;
+
+    scalar = job->args->data[5];
+    scalar->data.S32 = Nfail;
+
+    return true;
+}
Index: /trunk/psphot/src/psphotArguments.c
===================================================================
--- /trunk/psphot/src/psphotArguments.c	(revision 21165)
+++ /trunk/psphot/src/psphotArguments.c	(revision 21166)
@@ -38,5 +38,4 @@
 
 	// create the thread pool with number of desired threads, supplying our thread launcher function
-	// XXX need to determine the number of threads from the config data
 	psThreadPoolInit (nThreads);
     }
Index: /trunk/psphot/src/psphotBlendFit.c
===================================================================
--- /trunk/psphot/src/psphotBlendFit.c	(revision 21165)
+++ /trunk/psphot/src/psphotBlendFit.c	(revision 21166)
@@ -2,5 +2,5 @@
 
 // XXX I don't like this name
-bool psphotBlendFit (pmReadout *readout, psArray *sources, psMetadata *recipe, pmPSF *psf) {
+bool psphotBlendFit (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf) {
 
     int Nfit = 0;
@@ -12,4 +12,124 @@
     psTimerStart ("psphot.fit.nonlinear");
 
+    // select the appropriate recipe information
+    psMetadata *recipe  = psMetadataLookupPtr (&status, config->recipes, PSPHOT_RECIPE);
+    assert (recipe);
+
+    // determine the number of allowed threads
+    int nThreads = psMetadataLookupS32(&status, config->arguments, "NTHREADS"); // Number of threads
+    if (!status) {
+	nThreads = 0;
+    }
+    // nThreads = 0; // XXX until testing is complete, do not thread this function
+
+    psphotInitLimitsPSF (recipe, readout);
+    psphotInitLimitsEXT (recipe);
+    psphotInitRadiusPSF (recipe, psf->type);
+
+    // starts the timer, sets up the array of fitSets 
+    psphotFitInit (nThreads);
+
+    // source analysis is done in S/N order (brightest first)
+    sources = psArraySort (sources, pmSourceSortBySN);
+
+    // choose Cx, Cy (see psphotThreadTools.c for overview of the concepts)
+    int Cx = 1, Cy = 1;
+    psphotChooseCellSizes (&Cx, &Cy, readout, nThreads);
+
+    psArray *cellGroups = psphotAssignSources (Cx, Cy, sources);
+
+    fprintf (stderr, "starting with %ld sources\n", sources->n);
+
+    for (int i = 0; i < cellGroups->n; i++) {
+
+	psArray *cells = cellGroups->data[i];
+
+	for (int j = 0; j < cells->n; j++) {
+
+	    // allocate a job -- if threads are not defined, this just runs the job
+	    psThreadJob *job = psThreadJobAlloc ("PSPHOT_BLEND_FIT");
+	    psArray *newSources = psArrayAllocEmpty(16);
+
+	    psArrayAdd(job->args, 1, readout);
+	    psArrayAdd(job->args, 1, recipe);
+	    psArrayAdd(job->args, 1, cells->data[j]); // sources
+	    psArrayAdd(job->args, 1, psf);
+	    psArrayAdd(job->args, 1, newSources); // return for new sources
+	    psFree (newSources);
+
+	    PS_ARRAY_ADD_SCALAR(job->args, 0, PS_TYPE_S32); // this is used as a return value for Nfit
+	    PS_ARRAY_ADD_SCALAR(job->args, 0, PS_TYPE_S32); // this is used as a return value for Npsf
+	    PS_ARRAY_ADD_SCALAR(job->args, 0, PS_TYPE_S32); // this is used as a return value for Next
+	    PS_ARRAY_ADD_SCALAR(job->args, 0, PS_TYPE_S32); // this is used as a return value for Nfail
+
+	    if (!psThreadJobAddPending(job)) {
+		psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+		psFree (job);
+		return NULL;
+	    }
+	    psFree(job);
+
+	}
+
+	// wait for the threads to finish and manage results
+	if (!psThreadPoolWait (false)) {
+	    psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+	    return NULL;
+	}
+
+	// we have only supplied one type of job, so we can assume the types here
+	psThreadJob *job = NULL;
+	while ((job = psThreadJobGetDone()) != NULL) {
+	    if (job->args->n < 1) {
+		fprintf (stderr, "error with job\n");
+	    } else {
+		psScalar *scalar = NULL;
+		scalar = job->args->data[5];
+		Nfit += scalar->data.S32;
+		scalar = job->args->data[6];
+		Npsf += scalar->data.S32;
+		scalar = job->args->data[7];
+		Next += scalar->data.S32;
+		scalar = job->args->data[8];
+		Nfail += scalar->data.S32;
+
+		// add these back onto sources
+		psArray *newSources = job->args->data[4];
+		for (int j = 0; j < newSources->n; j++) {
+		    psArrayAdd (sources, 16, newSources->data[j]);
+		}
+	    }
+	    psFree(job);
+	}
+    }
+    psFree (cellGroups);
+
+    if (psTraceGetLevel("psphot") >= 6) {
+      psphotSaveImage (NULL, readout->image,  "image.v2.fits");
+    }
+
+    psLogMsg ("psphot.psphotBlendFit", PS_LOG_INFO, "fit models: %f sec for %d objects (%d psf, %d ext, %d failed, %ld skipped)\n", psTimerMark ("psphot.fit.nonlinear"), Nfit, Npsf, Next, Nfail, sources->n - Nfit);
+
+    psphotVisualShowResidualImage (readout);
+    psphotVisualShowFlags (sources);
+
+    return true;
+}
+
+bool psphotBlendFit_Threaded (psThreadJob *job) {
+
+    bool status = false;
+    int Nfit = 0;
+    int Npsf = 0;
+    int Next = 0;
+    int Nfail = 0;
+    psScalar *scalar = NULL;
+
+    pmReadout *readout  = job->args->data[0];
+    psMetadata *recipe  = job->args->data[1];
+    psArray *sources    = job->args->data[2];
+    pmPSF *psf          = job->args->data[3];
+    psArray *newSources = job->args->data[4];
+
     // bit-masks to test for good/bad pixels
     psMaskType maskVal = psMetadataLookupU8(&status, recipe, "MASK.PSPHOT");
@@ -23,15 +143,6 @@
     maskVal |= markVal;
 
-    // source analysis is done in S/N order (brightest first)
-    sources = psArraySort (sources, pmSourceSortBySN);
-
     // S/N limit to perform full non-linear fits
     float FIT_SN_LIM = psMetadataLookupF32 (&status, recipe, "FULL_FIT_SN_LIM");
-
-    psphotInitLimitsPSF (recipe, readout);
-    psphotInitLimitsEXT (recipe);
-    psphotInitRadiusPSF (recipe, psf->type);
-
-    psphotFitInit ();
 
     // option to limit analysis to a specific region
@@ -41,6 +152,4 @@
 
     for (int i = 0; i < sources->n; i++) {
-        // if (i%100 == 0) psphotFitSummary ();
-
         pmSource *source = sources->data[i];
 
@@ -57,5 +166,5 @@
         if (source->peak->SN < FIT_SN_LIM) continue;
 
-        // XXX this should use peak?
+	// exclude sources outside optional analysis region
         if (source->peak->xf < AnalysisRegion.x0) continue;
         if (source->peak->yf < AnalysisRegion.y0) continue;
@@ -66,5 +175,6 @@
         if (source->modelPSF == NULL) continue;
 
-        // skip sources which are insignificant flux?
+        // skip sources which are insignificant flux? 
+	// XXX this is somewhat ad-hoc
         if (source->modelPSF->params->data.F32[1] < 0.1) {
             psTrace ("psphot", 5, "skipping near-zero source: %f, %f : %f\n",
@@ -81,15 +191,8 @@
         Nfit ++;
 
-	// XXX TEST
-	// if ((fabs(source->peak->x - 1202) < 2) && (fabs(source->peak->y - 1065) < 2)) {
-	// psTraceSetLevel ("psLib.math.psMinimizeLMChi2", 6);
-	// }
-
-        // try fitting PSFs, then try extended sources
-        // these functions subtract the resulting fitted source (XXX and update the modelFlux?)
-	// XXX re-consider conditions under which the source has EXT fit:
-	// I should try EXT if the source size measurement says it is large
+        // try fitting PSFs or extended sources depending on source->mode
+        // these functions subtract the resulting fitted source
 	if (source->mode & PM_SOURCE_MODE_EXT_LIMIT) {
-	    if (psphotFitBlob (readout, source, sources, psf, maskVal, markVal)) {
+	    if (psphotFitBlob (readout, source, newSources, psf, maskVal, markVal)) {
 		source->type = PM_SOURCE_TYPE_EXTENDED;
 		psTrace ("psphot", 5, "source at %7.1f, %7.1f is ext", source->peak->xf, source->peak->yf);
@@ -115,12 +218,16 @@
     }
 
-    if (psTraceGetLevel("psphot") >= 6) {
-      psphotSaveImage (NULL, readout->image,  "image.v2.fits");
-    }
-
-    psLogMsg ("psphot.psphotBlendFit", PS_LOG_INFO, "fit models: %f sec for %d objects (%d psf, %d ext, %d failed, %ld skipped)\n", psTimerMark ("psphot.fit.nonlinear"), Nfit, Npsf, Next, Nfail, sources->n - Nfit);
-
-    psphotVisualShowResidualImage (readout);
-    psphotVisualShowFlags (sources);
+    // change the value of a scalar on the array (wrap this and put it in psArray.h)
+    scalar = job->args->data[5];
+    scalar->data.S32 = Nfit;
+
+    scalar = job->args->data[6];
+    scalar->data.S32 = Npsf;
+
+    scalar = job->args->data[7];
+    scalar->data.S32 = Next;
+
+    scalar = job->args->data[8];
+    scalar->data.S32 = Nfail;
 
     return true;
Index: /trunk/psphot/src/psphotCleanup.c
===================================================================
--- /trunk/psphot/src/psphotCleanup.c	(revision 21165)
+++ /trunk/psphot/src/psphotCleanup.c	(revision 21166)
@@ -12,4 +12,5 @@
     pmConceptsDone ();
     pmConfigDone ();
+    pmSourceFitSetDone ();
     // fprintf (stderr, "found %d leaks at %s\n", psMemCheckLeaks (0, NULL, NULL, false), "psphot");
     fprintf (stderr, "found %d leaks at %s\n", psMemCheckLeaks (0, NULL, stdout, false), "psphot");
Index: /trunk/psphot/src/psphotGuessModels.c
===================================================================
--- /trunk/psphot/src/psphotGuessModels.c	(revision 21165)
+++ /trunk/psphot/src/psphotGuessModels.c	(revision 21166)
@@ -1,7 +1,9 @@
 # include "psphotInternal.h"
 
-bool psphotChooseCellSizes (int *Cx, int *Cy, pmReadout *readout, int nThreads);
-psphotGuessModelForRegionArgs *psphotGuessModelForRegionArgsAlloc();
-bool psphotGuessModelForRegion (psphotGuessModelForRegionArgs *args);
+// XXX : the threading here is not great.  this may be due to blocks between elements, but
+// the selection of the objects in a cell is not optimal.  To fix:
+// 1) define the boundaries of the cells up front 
+// 2) loop over the sources once and associate them with their cell
+// 3) define the threaded function to work with sources for a given cell
 
 // A guess for when the moments aren't available
@@ -20,6 +22,4 @@
 }
 
-# define NFILL 4
-
 // construct an initial PSF model for each object
 bool psphotGuessModels (pmConfig *config, pmReadout *readout, psArray *sources, pmPSF *psf) {
@@ -33,9 +33,10 @@
     assert (recipe);
 
-    // int nThreads = psMetadataLookupS32(&status, config->arguments, "NTHREADS"); // Number of threads
-    // if (!status) {
-    // nThreads = 0;
-    // }
-    int nThreads = 0;
+    // determine the number of allowed threads
+    int nThreads = psMetadataLookupS32(&status, config->arguments, "NTHREADS"); // Number of threads
+    if (!status) {
+	nThreads = 0;
+    }
+    // nThreads = 0; // XXX until testing is complete, do not thread this function
 
     // bit-masks to test for good/bad pixels
@@ -53,130 +54,82 @@
     psphotInitRadiusPSF (recipe, psf->type);
 
-    // the strategy here is to divide the image into 2x2 blocks of cells and cycle through
-    // the four discontiguous sets of cells, threading all within a set and blocking between
-    // sets 
-
-    // we divide the image region into 2*2 blocks of size Nx*Ny, the image will have 
-    // Cx*Cy blocks so that (2Nx)Cx = numCols, (2Ny)Cy = numRows.  We want to choose Cx and
-    // Cy so that (2Nx)Cx * (2Ny)Cy = 4 * NFILL * nThreads -- each of the four sets of cells
-    // has enough cells to allow NFILL cells for each thread (to better distribute heavy and
-    // light load cells
-    
-    // choose Cx, Cy:
+    // choose Cx, Cy (see psphotThreadTools.c for overview of the concepts)
     int Cx = 1, Cy = 1;
     psphotChooseCellSizes (&Cx, &Cy, readout, nThreads);
 
-    // the array runs from readout->image->col0 to readout->image->col0 + readout->image->numCols 
-    int Xo = readout->image->col0;
-    int Yo = readout->image->row0;
-    int Nx = readout->image->numCols / (2*Cx);
-    int Ny = readout->image->numRows / (2*Cy);
-
-    // we can thread all of the cells within a set, but need to block between sets
-    for (int jy = 0; jy < 2; jy++) {
-	for (int jx = 0; jx < 2; jx++) {
-
-	    // generate jobs for all of the Cx*Cy cells in this set
-	    for (int ix = 0; ix < Cx; ix++) {
-		for (int iy = 0; iy < Cy; iy++) {
-		
-		    int x0 = (2*ix + jx)*Nx + Xo;
-		    int x1 = x0 + Nx;
-
-		    int y0 = (2*iy + jy)*Ny + Yo;
-		    int y1 = y0 + Ny;
-
-		    if (readout->image->numCols + Xo - x1 < Nx) {
-			x1 = readout->image->numCols + Xo;
-		    }
-		    if (readout->image->numRows + Yo - y1 < Ny) {
-			y1 = readout->image->numRows + Yo;
-		    }
-
-		    // fprintf (stderr, "launch %d,%d - %d,%d\n", x0, y0, x1, y1);
-		    
-		    psRegion *cellRegion = psRegionAlloc (x0, x1, y0, y1);
-		    *cellRegion = psRegionForImage (readout->image, *cellRegion);
-		    // if we got the math above right, this should be a NOP
-									     
-		    psphotGuessModelForRegionArgs *args = psphotGuessModelForRegionArgsAlloc();
-		    args->readout = psMemIncrRefCounter(readout);
-		    args->sources = psMemIncrRefCounter(sources);
-		    args->psf     = psMemIncrRefCounter(psf);
-		    args->region  = psMemIncrRefCounter(cellRegion);
-
-		    args->maskVal = maskVal;
-		    args->markVal = markVal;
-
-		    // allocate a job -- if threads are not defined, this just runs the job
-		    psThreadJob *job = psThreadJobAlloc ("PSPHOT_GUESS_MODEL");
-		    psArrayAdd(job->args, 1, args);
-		    if (!psThreadJobAddPending(job)) {
-			psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
-			return false;
-		    }
-		    psFree(cellRegion);
-		    psFree(job);
-		    psFree(args);
-		}
-	    }
-
-	    // wait for the threads to finish and manage results
-	    // wait here for the threaded jobs to finish
-	    // fprintf (stderr, "wait for threads (%d, %d)\n", jx, jy);
-	    if (!psThreadPoolWait (false)) {
+    psArray *cellGroups = psphotAssignSources (Cx, Cy, sources);
+
+    for (int i = 0; i < cellGroups->n; i++) {
+
+	psArray *cells = cellGroups->data[i];
+
+	for (int j = 0; j < cells->n; j++) {
+
+	    // allocate a job -- if threads are not defined, this just runs the job
+	    psThreadJob *job = psThreadJobAlloc ("PSPHOT_GUESS_MODEL");
+	    psArrayAdd(job->args, 1, readout);
+	    psArrayAdd(job->args, 1, cells->data[j]); // sources
+	    psArrayAdd(job->args, 1, psf);
+
+	    // XXX change these to use abstract mask type info
+	    PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8);
+	    PS_ARRAY_ADD_SCALAR(job->args, markVal,  PS_TYPE_U8);
+
+	    if (!psThreadJobAddPending(job)) {
 		psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+		psFree (job);
 		return false;
 	    }
-
-	    // we have only supplied one type of job, so we can assume the types here
-	    psThreadJob *job = NULL;
-	    while ((job = psThreadJobGetDone()) != NULL) {
-		// we have no returned data from this operation
-		if (job->args->n < 1) {
-		    fprintf (stderr, "error with job\n");
-		}
-		psFree(job);
-	    }
-	}
-    }
-    
+	    psFree(job);
+	}
+
+	// wait for the threads to finish and manage results
+	// wait here for the threaded jobs to finish
+	// fprintf (stderr, "wait for threads (%d, %d)\n", jx, jy);
+	if (!psThreadPoolWait (false)) {
+	    psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+	    return false;
+	}
+
+	// we have only supplied one type of job, so we can assume the types here
+	psThreadJob *job = NULL;
+	while ((job = psThreadJobGetDone()) != NULL) {
+	    // we have no returned data from this operation
+	    if (job->args->n < 1) {
+		fprintf (stderr, "error with job\n");
+	    }
+	    psFree(job);
+	}
+    }
+
+    // XXX check that all sources that should have models
+    int nMiss = 0;
+    for (int i = 0; i < sources->n; i++) {
+
+	pmSource *source = sources->data[i];
+	if (source->mode & PM_SOURCE_MODE_EXT_LIMIT) {
+	    source->mode &= ~PM_SOURCE_MODE_EXT_LIMIT;
+	    continue;
+	}
+
+	nMiss ++;
+    }
+    psLogMsg ("psphot.models", 4, "failed to build models for %d objects\n", nMiss);
+
+    psFree (cellGroups);
+
     psLogMsg ("psphot.models", 4, "built models for %ld objects: %f sec\n", sources->n, psTimerMark ("psphot.models"));
     return true;
 }
 
-static void psphotGuessModelForRegionArgsFree(psphotGuessModelForRegionArgs *args)
-{
-    psFree(args->readout);
-    psFree(args->sources);
-    psFree(args->psf);
-    psFree(args->region);
-    return;
-}
-
-psphotGuessModelForRegionArgs *psphotGuessModelForRegionArgsAlloc()
-{
-    psphotGuessModelForRegionArgs *args = psAlloc(sizeof(psphotGuessModelForRegionArgs));
-    psMemSetDeallocator(args, (psFreeFunc)psphotGuessModelForRegionArgsFree);
-
-    args->readout = NULL;
-    args->sources = NULL;
-    args->psf = NULL;
-    args->region = NULL;
-
-    args->maskVal = 0;
-    args->markVal = 0;
-    return args;
-}
-
 // construct models only for sources in the specified region
-bool psphotGuessModelForRegion (psphotGuessModelForRegionArgs *args) {
-
-    pmReadout *readout = args->readout;
-    psArray *sources = args->sources;
-    pmPSF *psf = args->psf;
-    psRegion *region = args->region;
-    psMaskType maskVal = args->maskVal;
-    psMaskType markVal = args->markVal;
+bool psphotGuessModel_Threaded (psThreadJob *job) {
+
+    pmReadout *readout = job->args->data[0];
+    psArray *sources   = job->args->data[1];
+    pmPSF *psf         = job->args->data[2];
+    
+    psMaskType maskVal = PS_SCALAR_VALUE(job->args->data[3],U8);
+    psMaskType markVal = PS_SCALAR_VALUE(job->args->data[4],U8);
 
     int nSrc = 0;
@@ -184,4 +137,8 @@
     for (int i = 0; i < sources->n; i++) {
 	pmSource *source = sources->data[i];
+
+	// XXXX this is just for a test: use this to mark sources for which the model is measured
+	// check later that all are used.
+	source->mode |= PM_SOURCE_MODE_EXT_LIMIT;
 
 	// skip non-astronomical objects (very likely defects)
@@ -189,9 +146,4 @@
 	if (source->type == PM_SOURCE_TYPE_SATURATED) continue;
 	if (!source->peak) continue;
-
-	if (source->peak->xf <  region->x0) continue;
-	if (source->peak->yf <  region->y0) continue;
-	if (source->peak->xf >= region->x1) continue;
-	if (source->peak->yf >= region->y1) continue;
 
 	nSrc ++;
@@ -256,32 +208,5 @@
     }
 
-    // fprintf (stderr, "%d for region %lf,%lf - %lf,%lf\n", nSrc, region->x0, region->y0, region->x1, region->y1);
     return true;
 }
 
-bool psphotChooseCellSizes (int *Cx, int *Cy, pmReadout *readout, int nThreads) {
-
-    int nCells = nThreads * NFILL; // number of cells in a single set
-    int C = sqrt(nCells) + 0.5;
-    
-    // we need to assign Cx and Cy based on the dimensionality of the image
-    // crude way to find most evenly balanced factors of nCells:
-    for (int i = C; i >= 1; i--) {
-	int C1 = nCells / C;
-	int C2 = nCells / C1;
-	if (C1*C2 != nCells) continue;
-
-	if (readout->image->numRows > readout->image->numCols) {
-	    *Cx = PS_MAX (C1, C2);
-	    *Cy = PS_MIN (C1, C2);
-	} else {
-	    *Cx = PS_MAX (C1, C2);
-	    *Cy = PS_MIN (C1, C2);
-	}
-	return true;
-    }
-    *Cx = 1;
-    *Cy = 1; 
-
-    return true;
-}
Index: /trunk/psphot/src/psphotMagnitudes.c
===================================================================
--- /trunk/psphot/src/psphotMagnitudes.c	(revision 21165)
+++ /trunk/psphot/src/psphotMagnitudes.c	(revision 21166)
@@ -1,5 +1,5 @@
 # include "psphotInternal.h"
 
-bool psphotMagnitudes(pmConfig *config, const pmFPAview *view, psArray *sources, psMetadata *recipe, pmPSF *psf) {
+bool psphotMagnitudes(pmConfig *config, pmReadout *readout, const pmFPAview *view, psArray *sources, pmPSF *psf) {
 
     bool status = false;
@@ -7,4 +7,15 @@
 
     psTimerStart ("psphot.mags");
+
+    // select the appropriate recipe information
+    psMetadata *recipe  = psMetadataLookupPtr (&status, config->recipes, PSPHOT_RECIPE);
+    assert (recipe);
+
+    // determine the number of allowed threads
+    int nThreads = psMetadataLookupS32(&status, config->arguments, "NTHREADS"); // Number of threads
+    if (!status) {
+	nThreads = 0;
+    }
+    // nThreads = 0; // XXX until testing is complete, do not thread this function
 
     pmReadout *backModel = psphotSelectBackground (config, view);
@@ -40,4 +51,76 @@
     if (INTERPOLATE_AP) photMode |= PM_SOURCE_PHOT_INTERP;
 
+    // choose Cx, Cy (see psphotThreadTools.c for overview of the concepts)
+    int Cx = 1, Cy = 1;
+    psphotChooseCellSizes (&Cx, &Cy, readout, nThreads);
+
+    psArray *cellGroups = psphotAssignSources (Cx, Cy, sources);
+
+    for (int i = 0; i < cellGroups->n; i++) {
+
+	psArray *cells = cellGroups->data[i];
+
+	for (int j = 0; j < cells->n; j++) {
+
+	    // allocate a job -- if threads are not defined, this just runs the job
+	    psThreadJob *job = psThreadJobAlloc ("PSPHOT_MAGNITUDES");
+
+	    psArrayAdd(job->args, 1, cells->data[j]); // sources
+	    psArrayAdd(job->args, 1, psf);
+	    psArrayAdd(job->args, 1, binning);
+	    psArrayAdd(job->args, 1, backModel);
+	    psArrayAdd(job->args, 1, backStdev);
+
+	    PS_ARRAY_ADD_SCALAR(job->args, photMode, PS_TYPE_S32);
+	    PS_ARRAY_ADD_SCALAR(job->args, maskVal,  PS_TYPE_U8); // XXX change this to use abstract mask type info
+	    PS_ARRAY_ADD_SCALAR(job->args, 0,        PS_TYPE_S32); // this is used as a return value for nAp
+
+	    if (!psThreadJobAddPending(job)) {
+		psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+		psFree (job);
+		return false;
+	    }
+	    psFree(job);
+
+	}
+
+	// wait for the threads to finish and manage results
+	if (!psThreadPoolWait (false)) {
+	    psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+	    return false;
+	}
+
+	// we have only supplied one type of job, so we can assume the types here
+	psThreadJob *job = NULL;
+	while ((job = psThreadJobGetDone()) != NULL) {
+	    if (job->args->n < 1) {
+		fprintf (stderr, "error with job\n");
+	    } else {
+		psScalar *scalar = job->args->data[7];
+		Nap += scalar->data.S32;
+	    }
+	    psFree(job);
+	}
+    }
+
+    psFree (cellGroups);
+
+    psLogMsg ("psphot.magnitudes", PS_LOG_DETAIL, "measure magnitudes : %f sec for %ld objects (%d with apertures)\n", psTimerMark ("psphot.mags"), sources->n, Nap);
+    return true;
+}
+
+bool psphotMagnitudes_Threaded (psThreadJob *job) {
+
+    bool status;
+    int Nap = 0;
+
+    psArray *sources                = job->args->data[0];
+    pmPSF *psf                      = job->args->data[1];
+    psImageBinning *binning         = job->args->data[2];
+    pmReadout *backModel            = job->args->data[3];
+    pmReadout *backStdev            = job->args->data[4];
+    pmSourcePhotometryMode photMode = PS_SCALAR_VALUE(job->args->data[5],S32);
+    psMaskType maskVal              = PS_SCALAR_VALUE(job->args->data[6],U8);
+
     for (int i = 0; i < sources->n; i++) {
         pmSource *source = (pmSource *) sources->data[i];
@@ -47,21 +130,17 @@
         source->sky = psImageUnbinPixel(source->peak->x, source->peak->y, backModel->image, binning);
         if (isnan(source->sky) && false) {
-          psError(PSPHOT_ERR_SKY, false, "Setting pmSource.sky");
-          psErrorStackPrint(NULL, " ");
-          psErrorClear();
+	    psLogMsg ("psphot.magnitudes", PS_LOG_DETAIL, "error setting pmSource.sky");
         }
 
         source->skyErr = psImageUnbinPixel(source->peak->x, source->peak->y, backStdev->image, binning);
         if (isnan(source->skyErr) && false) {
-          psError(PSPHOT_ERR_SKY, false, "Setting pmSource.sky");
-          psErrorStackPrint(NULL, " ");
-          psErrorClear();
+	    psLogMsg ("psphot.magnitudes", PS_LOG_DETAIL, "error setting pmSource.skyErr");
         }
     }
 
-    psLogMsg ("psphot.magnitudes", PS_LOG_DETAIL, "measure magnitudes : %f sec for %ld objects (%d with apertures)\n", psTimerMark ("psphot.mags"), sources->n, Nap);
+    // change the value of a scalar on the array (wrap this and put it in psArray.h)
+    psScalar *scalar = job->args->data[7];
+    scalar->data.S32 = Nap;
+
     return true;
 }
-
-// XXX add in a measurement of the bright and faint completeness values
-// XXX push these into the recipe
Index: /trunk/psphot/src/psphotReadout.c
===================================================================
--- /trunk/psphot/src/psphotReadout.c	(revision 21165)
+++ /trunk/psphot/src/psphotReadout.c	(revision 21166)
@@ -79,5 +79,5 @@
 
     // construct sources and measure basic stats
-    psArray *sources = psphotSourceStats (readout, recipe, detections);
+    psArray *sources = psphotSourceStats (config, readout, detections);
     if (!sources) return false;
     if (!strcasecmp (breakPt, "PEAKS")) {
@@ -178,5 +178,5 @@
 
     // non-linear PSF and EXT fit to brighter sources
-    psphotBlendFit (readout, sources, recipe, psf);
+    psphotBlendFit (config, readout, sources, psf);
 
     // replace all sources
@@ -207,5 +207,5 @@
 
     // define new sources based on only the new peaks
-    psArray *newSources = psphotSourceStats (readout, recipe, detections);
+    psArray *newSources = psphotSourceStats (config, readout, detections);
 
     // set source type
@@ -243,5 +243,5 @@
 
     // measure aperture photometry corrections
-    if (!psphotApResid (readout, sources, recipe, psf)) {
+    if (!psphotApResid (config, readout, sources, psf)) {
         psLogMsg ("psphot", 3, "failed on psphotApResid");
         return psphotReadoutCleanup (config, readout, recipe, detections, psf, sources);
@@ -249,5 +249,5 @@
 
     // calculate source magnitudes
-    psphotMagnitudes(config, view, sources, recipe, psf);
+    psphotMagnitudes(config, readout, view, sources, psf);
 
     // replace failed sources?
Index: /trunk/psphot/src/psphotReadoutFindPSF.c
===================================================================
--- /trunk/psphot/src/psphotReadoutFindPSF.c	(revision 21165)
+++ /trunk/psphot/src/psphotReadoutFindPSF.c	(revision 21166)
@@ -37,5 +37,5 @@
 
     // construct sources and measure basic stats (moments, local sky)
-    psArray *sources = psphotSourceStats(readout, recipe, detections);
+    psArray *sources = psphotSourceStats(config, readout, detections);
     if (!sources) return false;
 
Index: /trunk/psphot/src/psphotSetThreads.c
===================================================================
--- /trunk/psphot/src/psphotSetThreads.c	(revision 21165)
+++ /trunk/psphot/src/psphotSetThreads.c	(revision 21166)
@@ -1,12 +1,3 @@
 # include "psphot.h"
-
-// each thread runs this function, starting a new job when it finished with an old one
-// it is called with a (void *) pointer to its own thread pointer
-bool psphotThread_psphotGuessModel(psThreadJob *job)
-{
-    psphotGuessModelForRegionArgs *args = job->args->data[0];
-    bool status = psphotGuessModelForRegion (args);
-    return status;
-}
 
 bool psphotSetThreads () {
@@ -14,6 +5,26 @@
     psThreadTask *task = NULL;
 
-    task = psThreadTaskAlloc("PSPHOT_GUESS_MODEL", 1);
-    task->function = &psphotThread_psphotGuessModel;
+    task = psThreadTaskAlloc("PSPHOT_GUESS_MODEL", 5);
+    task->function = &psphotGuessModel_Threaded;
+    psThreadTaskAdd(task);
+    psFree(task);
+
+    task = psThreadTaskAlloc("PSPHOT_MAGNITUDES", 8);
+    task->function = &psphotMagnitudes_Threaded;
+    psThreadTaskAdd(task);
+    psFree(task);
+
+    task = psThreadTaskAlloc("PSPHOT_APRESID_MAGS", 6);
+    task->function = &psphotApResidMags_Threaded;
+    psThreadTaskAdd(task);
+    psFree(task);
+
+    task = psThreadTaskAlloc("PSPHOT_SOURCE_STATS", 4);
+    task->function = &psphotSourceStats_Threaded;
+    psThreadTaskAdd(task);
+    psFree(task);
+
+    task = psThreadTaskAlloc("PSPHOT_BLEND_FIT", 9);
+    task->function = &psphotBlendFit_Threaded;
     psThreadTaskAdd(task);
     psFree(task);
Index: /trunk/psphot/src/psphotSourceFits.c
===================================================================
--- /trunk/psphot/src/psphotSourceFits.c	(revision 21165)
+++ /trunk/psphot/src/psphotSourceFits.c	(revision 21166)
@@ -9,6 +9,7 @@
 static int NfitEXT = 0;
 
-bool psphotFitInit () {
+bool psphotFitInit (int nThreads) {
     psTimerStart ("psphot.fits");
+    pmSourceFitSetInit (nThreads);
     return true;
 }
@@ -207,5 +208,5 @@
 }
 
-bool psphotFitBlob (pmReadout *readout, pmSource *source, psArray *sources, pmPSF *psf, psMaskType maskVal, psMaskType markVal) {
+bool psphotFitBlob (pmReadout *readout, pmSource *source, psArray *newSources, pmPSF *psf, psMaskType maskVal, psMaskType markVal) {
 
     bool okEXT, okDBL;
@@ -310,5 +311,5 @@
     psTrace ("psphot", 5, "blob as DBL: %f %f\n", ONE->params->data.F32[PM_PAR_XPOS], ONE->params->data.F32[PM_PAR_YPOS]);
 
-    psArrayAdd (sources, 100, newSrc);
+    psArrayAdd (newSources, 100, newSrc);
     psFree (newSrc);
     psFree (DBL);
Index: /trunk/psphot/src/psphotSourceStats.c
===================================================================
--- /trunk/psphot/src/psphotSourceStats.c	(revision 21165)
+++ /trunk/psphot/src/psphotSourceStats.c	(revision 21166)
@@ -1,31 +1,24 @@
 # include "psphotInternal.h"
 
-psArray *psphotSourceStats (pmReadout *readout, psMetadata *recipe, pmDetections *detections)
-{
-    bool     status  = false;
+psArray *psphotSourceStats (pmConfig *config, pmReadout *readout, pmDetections *detections) {
+
+    bool status = false;
     psArray *sources = NULL;
-    float BIG_RADIUS;
 
     psTimerStart ("psphot.stats");
 
-    // bit-masks to test for good/bad pixels
-    psMaskType maskVal = psMetadataLookupU8(&status, recipe, "MASK.PSPHOT");
-    assert (maskVal);
-
-    // bit-mask to mark pixels not used in analysis
-    psMaskType markVal = psMetadataLookupU8(&status, recipe, "MARK.PSPHOT");
-    assert (markVal);
-
-    // maskVal is used to test for rejected pixels, and must include markVal
-    maskVal |= markVal;
+    // select the appropriate recipe information
+    psMetadata *recipe  = psMetadataLookupPtr (&status, config->recipes, PSPHOT_RECIPE);
+    assert (recipe);
+
+    // determine the number of allowed threads
+    int nThreads = psMetadataLookupS32(&status, config->arguments, "NTHREADS"); // Number of threads
+    if (!status) {
+	nThreads = 0;
+    }
+    // nThreads = 0; // XXX until testing is complete, do not thread this function
 
     // determine properties (sky, moments) of initial sources
-    float INNER    = psMetadataLookupF32 (&status, recipe, "SKY_INNER_RADIUS");
-    if (!status) return NULL;
     float OUTER    = psMetadataLookupF32 (&status, recipe, "SKY_OUTER_RADIUS");
-    if (!status) return NULL;
-    float RADIUS   = psMetadataLookupF32 (&status, recipe, "PSF_MOMENTS_RADIUS");
-    if (!status) return NULL;
-    float MIN_SN   = psMetadataLookupF32 (&status, recipe, "MOMENTS_SN_MIN");
     if (!status) return NULL;
     char *breakPt  = psMetadataLookupStr (&status, recipe, "BREAK_POINT");
@@ -38,31 +31,135 @@
     }
 
+    // generate the array of sources, define the associated pixel
     sources = psArrayAllocEmpty (peaks->n);
 
+    for (int i = 0; i < peaks->n; i++) {
+
+        pmPeak *peak = peaks->data[i];
+        if (peak->assigned) continue;
+
+        // create a new source
+        pmSource *source = pmSourceAlloc();
+
+	// add the peak
+        source->peak = psMemIncrRefCounter(peak);
+
+	// allocate space for moments
+        source->moments = pmMomentsAlloc();
+
+        // allocate image, weight, mask arrays for each peak (square of radius OUTER)
+        pmSourceDefinePixels (source, readout, source->peak->x, source->peak->y, OUTER);
+
+	peak->assigned = true;
+	psArrayAdd (sources, 100, source);
+	psFree (source);
+    }
+
+    if (!strcasecmp (breakPt, "PEAKS")) {
+	psLogMsg ("psphot", PS_LOG_INFO, "%ld sources : %f sec\n", sources->n, psTimerMark ("psphot.stats"));
+	psLogMsg ("psphot", PS_LOG_INFO, "break point PEAKS, skipping MOMENTS\n");
+	psphotVisualShowMoments (sources);
+	return sources;
+    }
+
+    // threaded measurement of the source magnitudes
     int Nfail = 0;
     int Nmoments = 0;
-    for (int i = 0; i < peaks->n; i++) {
-
-        pmPeak *peak = peaks->data[i];
-        if (peak->assigned) continue;
-
-        // create a new source, add peak
-        pmSource *source = pmSourceAlloc();
-        source->peak = psMemIncrRefCounter(peak);
-
-        // allocate image, weight, mask arrays for each peak (square of radius OUTER)
-        pmSourceDefinePixels (source, readout, source->peak->x, source->peak->y, OUTER);
-        if (!strcasecmp (breakPt, "PEAKS")) {
-            peak->assigned = true;
-            psArrayAdd (sources, 100, source);
-            psFree (source);
-            continue;
-        }
+
+    // choose Cx, Cy (see psphotThreadTools.c for overview of the concepts)
+    int Cx = 1, Cy = 1;
+    psphotChooseCellSizes (&Cx, &Cy, readout, nThreads);
+
+    psArray *cellGroups = psphotAssignSources (Cx, Cy, sources);
+
+    for (int i = 0; i < cellGroups->n; i++) {
+
+	psArray *cells = cellGroups->data[i];
+
+	for (int j = 0; j < cells->n; j++) {
+
+	    // allocate a job -- if threads are not defined, this just runs the job
+	    psThreadJob *job = psThreadJobAlloc ("PSPHOT_SOURCE_STATS");
+
+	    psArrayAdd(job->args, 1, cells->data[j]); // sources
+	    psArrayAdd(job->args, 1, recipe);
+	    PS_ARRAY_ADD_SCALAR(job->args, 0, PS_TYPE_S32); // this is used as a return value for Nmoments
+	    PS_ARRAY_ADD_SCALAR(job->args, 0, PS_TYPE_S32); // this is used as a return value for Nfail
+
+	    if (!psThreadJobAddPending(job)) {
+		psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+		psFree (job);
+		return NULL;
+	    }
+	    psFree(job);
+
+	}
+
+	// wait for the threads to finish and manage results
+	if (!psThreadPoolWait (false)) {
+	    psError(PS_ERR_UNKNOWN, false, "Unable to guess model.");
+	    return NULL;
+	}
+
+	// we have only supplied one type of job, so we can assume the types here
+	psThreadJob *job = NULL;
+	while ((job = psThreadJobGetDone()) != NULL) {
+	    if (job->args->n < 1) {
+		fprintf (stderr, "error with job\n");
+	    } else {
+		psScalar *scalar = NULL;
+		scalar = job->args->data[2];
+		Nmoments += scalar->data.S32;
+		scalar = job->args->data[3];
+		Nfail += scalar->data.S32;
+	    }
+	    psFree(job);
+	}
+    }
+
+    psFree (cellGroups);
+
+    psLogMsg ("psphot", PS_LOG_INFO, "%ld sources, %d moments, %d failed: %f sec\n", sources->n, Nmoments, Nfail, psTimerMark ("psphot.stats"));
+
+    psphotVisualShowMoments (sources);
+
+    return (sources);
+}
+
+bool psphotSourceStats_Threaded (psThreadJob *job) {
+
+    bool status = false;
+    float BIG_RADIUS;
+    psScalar *scalar = NULL;
+
+    psArray *sources                = job->args->data[0];
+    psMetadata *recipe              = job->args->data[1];
+
+    float INNER    = psMetadataLookupF32 (&status, recipe, "SKY_INNER_RADIUS");
+    if (!status) return false;
+    float MIN_SN   = psMetadataLookupF32 (&status, recipe, "MOMENTS_SN_MIN");
+    if (!status) return false;
+    float RADIUS   = psMetadataLookupF32 (&status, recipe, "PSF_MOMENTS_RADIUS");
+    if (!status) return false;
+
+    // bit-masks to test for good/bad pixels
+    psMaskType maskVal = psMetadataLookupU8(&status, recipe, "MASK.PSPHOT");
+    assert (maskVal);
+
+    // bit-mask to mark pixels not used in analysis
+    psMaskType markVal = psMetadataLookupU8(&status, recipe, "MARK.PSPHOT");
+    assert (markVal);
+
+    // maskVal is used to test for rejected pixels, and must include markVal
+    maskVal |= markVal;
+
+    // threaded measurement of the sources moments
+    int Nfail = 0;
+    int Nmoments = 0;
+    for (int i = 0; i < sources->n; i++) {
+        pmSource *source = sources->data[i];
 
         // skip faint sources for moments measurement
         if (source->peak->SN < MIN_SN) {
-            peak->assigned = true;
-            psArrayAdd (sources, 100, source);
-            psFree (source);
             continue;
         }
@@ -72,8 +169,7 @@
         status = pmSourceLocalSky (source, PS_STAT_SAMPLE_MEDIAN, INNER, maskVal, markVal);
         if (!status) {
-          psFree (source);
-          Nfail ++;
-          psErrorClear();
-          continue;
+	    psErrorClear(); // XXX re-consider the errors raised here
+	    Nfail ++;
+	    continue;
         }
 
@@ -82,8 +178,7 @@
         status = pmSourceLocalSkyVariance (source, PS_STAT_SAMPLE_MEDIAN, INNER, maskVal, markVal);
         if (!status) {
-          psFree (source);
-          Nfail ++;
-          psErrorClear();
-          continue;
+	    Nfail ++;
+	    psErrorClear();
+	    continue;
         }
 
@@ -91,8 +186,4 @@
         status = pmSourceMoments (source, RADIUS);
         if (status) {
-            // add to the source array
-            peak->assigned = true;
-            psArrayAdd (sources, 100, source);
-            psFree (source);
             Nmoments ++;
             continue;
@@ -105,13 +196,8 @@
         status = pmSourceMoments (source, BIG_RADIUS);
         if (status) {
-            // add to the source array
-            peak->assigned = true;
-            psArrayAdd (sources, 100, source);
-            psFree (source);
             Nmoments ++;
             continue;
         }
 
-        psFree (source);
         Nfail ++;
         psErrorClear();
@@ -119,10 +205,11 @@
     }
 
-    psLogMsg ("psphot", PS_LOG_INFO, "%ld sources, %d moments, %d failed: %f sec\n", sources->n, Nmoments, Nfail, psTimerMark ("psphot.stats"));
-
-    psphotVisualShowMoments (sources);
-
-    return (sources);
+    // change the value of a scalar on the array (wrap this and put it in psArray.h)
+    scalar = job->args->data[2];
+    scalar->data.S32 = Nmoments;
+
+    scalar = job->args->data[3];
+    scalar->data.S32 = Nfail;
+    
+    return true;
 }
-
-// XXX EAM : filter out bad peaks (eg, no valid pixels available for sky)
Index: /trunk/psphot/src/psphotThreadTools.c
===================================================================
--- /trunk/psphot/src/psphotThreadTools.c	(revision 21166)
+++ /trunk/psphot/src/psphotThreadTools.c	(revision 21166)
@@ -0,0 +1,116 @@
+# include "psphotInternal.h"
+
+// the strategy here is to divide the image into 2x2 blocks of cells and cycle through
+// the four discontiguous sets of cells, threading all within a set and blocking between
+// sets 
+
+// we divide the image region into 2*2 blocks of size Nx*Ny, the image will have 
+// Cx*Cy blocks so that (2Nx)Cx = numCols, (2Ny)Cy = numRows.  We want to choose Cx and
+// Cy so that (2Nx)Cx * (2Ny)Cy = 4 * NFILL * nThreads -- each of the four sets of cells
+// has enough cells to allow NFILL cells for each thread (to better distribute heavy and
+// light load cells
+    
+// the array runs from readout->image->col0 to readout->image->col0 + readout->image->numCols 
+
+// we save these for threaded analysis runs
+static int Xo = 0;
+static int Yo = 0;
+static int Nx = 1;
+static int Ny = 1;
+
+bool psphotChooseCellSizes (int *Cx, int *Cy, pmReadout *readout, int nThreads) {
+
+    int nCells = nThreads * 2*2; // number of cells in a single set
+    int C = sqrt(nCells) + 0.5;
+    
+    // we need to assign Cx and Cy based on the dimensionality of the image
+    // crude way to find most evenly balanced factors of nCells:
+    for (int i = C; i >= 1; i--) {
+	int C1 = nCells / C;
+	int C2 = nCells / C1;
+	if (C1*C2 != nCells) continue;
+
+	if (readout->image->numRows > readout->image->numCols) {
+	    *Cx = PS_MAX (C1, C2);
+	    *Cy = PS_MIN (C1, C2);
+	} else {
+	    *Cx = PS_MAX (C1, C2);
+	    *Cy = PS_MIN (C1, C2);
+	}
+
+	Xo = readout->image->col0;
+	Yo = readout->image->row0;
+	Nx = readout->image->numCols / (*Cx*2);
+	Ny = readout->image->numRows / (*Cy*2);
+
+	return true;
+    }
+    *Cx = 1;
+    *Cy = 1; 
+
+    Xo = readout->image->col0;
+    Yo = readout->image->row0;
+    Nx = readout->image->numCols / (*Cx*2);
+    Ny = readout->image->numRows / (*Cy*2);
+
+    return true;
+}
+
+
+bool psphotCoordToCell (int *group, int *cell, float x, float y, int Cx, int Cy) {
+  
+    // XXX need to handle edges
+    int ix = (x - Xo)/(2*Nx);
+    ix = PS_MAX (0, PS_MIN (ix, Cx - 1));
+
+    int iy = (y - Yo)/(2*Ny);
+    iy = PS_MAX (0, PS_MIN (iy, Cy - 1));
+
+    int jx = (((int)(x - Xo))%(2*Nx))/Nx;
+    jx = PS_MAX (0, PS_MIN (jx, Nx - 1));
+
+    int jy = (((int)(y - Yo))%(2*Ny))/Ny;
+    jy = PS_MAX (0, PS_MIN (jy, Ny - 1));
+
+    *group = jx + 2*jy;
+    *cell  = ix + Cx*iy;
+
+    return true;
+}
+
+
+// we have 2x2 * Cx*Cy cells for the image.  we need a function to convert an x,y
+// coordinate pair into the index for these cells
+
+// first, how shall we number them?  is there one index for all cells, or one set of
+// indices for each of the 2x2 cell groups?  
+
+psArray *psphotAssignSources (int Cx, int Cy, psArray *sources) {
+
+    psArray *cellGroups = psArrayAlloc (4);
+    for (int i = 0; i < cellGroups->n; i++) {
+	psArray *cells = psArrayAlloc (Cx*Cy);
+	cellGroups->data[i] = cells;
+	for (int j = 0; j < cells->n; j++) {
+	    psArray *sources = psArrayAllocEmpty (50);
+	    cells->data[j] = sources;
+	}
+    }
+
+    for (int i = 0; i < sources->n; i++) {
+    
+	int group = 0;
+	int cell = 0;
+
+	pmSource *source = sources->data[i];
+
+	psphotCoordToCell (&group, &cell, source->peak->xf, source->peak->yf, Cx, Cy);
+       
+	psArray *cells = cellGroups->data[group];
+	psArray *sources = cells->data[cell];
+	
+	psArrayAdd (sources, 100, source);
+    }
+	
+    return cellGroups;
+}
