Index: trunk/Ohana/src/opihi/pcontrol/CheckSystem.c
===================================================================
--- trunk/Ohana/src/opihi/pcontrol/CheckSystem.c	(revision 7917)
+++ trunk/Ohana/src/opihi/pcontrol/CheckSystem.c	(revision 8296)
@@ -13,4 +13,5 @@
   CheckBusyJobs(0.020);  /* get job status */
   CheckDoneJobs(0.020);  /* harvest job stdout/stderr */
+  CheckKillJobs(0.020);  /* harvest job stdout/stderr */
 
   CheckDoneHosts(0.020); /* reset the host */
@@ -51,12 +52,28 @@
   float dtime;
 
-  stack = GetJobStack (PCONTROL_JOB_BUSY);
-  Nobject = stack[0].Nobject;
-
-  /* always allow at least one test */
-  gettimeofday (&start, (void *) NULL);
-  dtime = 0.0;
-  for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    job = GetStack (stack, STACK_TOP);
+  /* Loop through objects on the stack, no more than once.  Note that it is not important if the
+     stack size is modified by other threads or is changed by any of the actions performed during
+     this loop: the Nobject value is only used to get a rough number for the number of iterations.
+   */
+
+  hoststack = GetHostStack (PCONTROL_HOST_BUSY);
+  jobstack  = GetJobStack (PCONTROL_JOB_BUSY);
+  Nobject   = jobstack[0].Nobject;
+
+  /* always allow at least one test */
+  gettimeofday (&start, (void *) NULL);
+  dtime = 0.0;
+  for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
+    /* pull both job and host from their stacks */
+    /* XXX is the subject to the Dangerous Embrace? */
+    LockStack (hoststack);
+    job = PullStackByLocation (jobstack, STACK_TOP);
+    if (job == NULL) {
+      UnlockStack (hoststack);
+      break;
+    }
+    host = RemoveStackByID (hoststack, job[0].host[0].HostID);
+    UnlockStack (hoststack);
+
     CheckBusyJob (job);
     gettimeofday (&stop, (void *) NULL);
@@ -75,4 +92,5 @@
   float dtime;
 
+  /* Loop through objects on the stack, no more than once. see note above */
   stack = GetJobStack (PCONTROL_JOB_DONE);
   Nobject = stack[0].Nobject;
@@ -82,5 +100,6 @@
   dtime = 0.0;
   for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    job = GetStack (stack, STACK_TOP);
+    job = PullStackByLocation (stack, STACK_TOP);
+    if (job == NULL) break;
     CheckDoneJob (job);
     gettimeofday (&stop, (void *) NULL);
@@ -91,4 +110,30 @@
 }
 
+int CheckKillJobs (float MaxDelay) {
+
+  struct timeval start, stop;
+  int i, Nobject;
+  Stack *stack;
+  Job   *job;
+  float dtime;
+
+  /* Loop through objects on the stack, no more than once. see note above */
+  stack = GetJobStack (PCONTROL_JOB_KILL);
+  Nobject = stack[0].Nobject;
+
+  /* always allow at least one test */
+  gettimeofday (&start, (void *) NULL);
+  dtime = 0.0;
+  for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
+    job = PullStackByLocation (stack, STACK_TOP);
+    if (job == NULL) break;
+    KillJob (job);
+    gettimeofday (&stop, (void *) NULL);
+    dtime = DTIME (stop, start);
+  }
+  if (0 && (Nobject > 0)) gprint (GP_ERR, "checked %d of %d jobs\n", i, Nobject);
+  return (TRUE);
+}
+
 int CheckDoneHosts (float MaxDelay) {
 
@@ -99,4 +144,5 @@
   float dtime;
 
+  /* Loop through objects on the stack, no more than once. see note above */
   stack = GetHostStack (PCONTROL_HOST_DONE);
   Nobject = stack[0].Nobject;
@@ -106,5 +152,6 @@
   dtime = 0.0;
   for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    host = GetStack (stack, STACK_TOP);
+    host = PullStackByLocation (stack, STACK_TOP);
+    if (host == NULL) break;
     CheckDoneHost (host);
     gettimeofday (&stop, (void *) NULL);
@@ -123,4 +170,5 @@
   float dtime;
 
+  /* Loop through objects on the stack, no more than once. see note above */
   stack = GetHostStack (PCONTROL_HOST_DOWN);
   Nobject = stack[0].Nobject;
@@ -130,5 +178,6 @@
   dtime = 0.0;
   for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    host = GetStack (stack, STACK_TOP);
+    host = PullStackByLocation (stack, STACK_TOP);
+    if (host == NULL) break;
     dtime = DTIME (host[0].nexttry, start);
     if (dtime > 0) {
@@ -156,4 +205,5 @@
   if (!stack[0].Nobject) return (TRUE);
 
+  /* Loop through objects on the stack, no more than once. see note above */
   stack = GetHostStack (PCONTROL_HOST_IDLE);
   Nobject = stack[0].Nobject;
@@ -163,5 +213,6 @@
   dtime = 0.0;
   for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    host = GetStack (stack, STACK_TOP);
+    host = PullStackByLocation (stack, STACK_TOP);
+    if (host == NULL) break;
     CheckIdleHost (host);
     gettimeofday (&stop, (void *) NULL);
@@ -181,4 +232,5 @@
   float dtime;
 
+  /* Loop through objects on the stack, no more than once. see note above */
   stack = GetHostStack (PCONTROL_HOST_IDLE);
   Nobject = stack[0].Nobject;
@@ -188,5 +240,6 @@
   dtime = 0.0;
   for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    host = GetStack (stack, STACK_TOP);
+    host = PullStackByLocation (stack, STACK_TOP);
+    if (host == NULL) break;
     CheckHost (host);
     gettimeofday (&stop, (void *) NULL);
@@ -195,4 +248,5 @@
   if (0) gprint (GP_ERR, "checked %d idle hosts\n", i);
 
+  /* Loop through objects on the stack, no more than once. see note above */
   stack = GetHostStack (PCONTROL_HOST_BUSY);
   Nobject = stack[0].Nobject;
@@ -200,5 +254,6 @@
   dtime = 0.0;
   for (i = 0; (i < Nobject) && (dtime < MaxDelay); i++) {
-    host = GetStack (stack, STACK_TOP);
+    host = PullStackByLocation (stack, STACK_TOP);
+    if (host == NULL) break;
     CheckHost (host);
     gettimeofday (&stop, (void *) NULL);
