Index: trunk/ippScripts/scripts/ipp_cleanup.pl
===================================================================
--- trunk/ippScripts/scripts/ipp_cleanup.pl	(revision 27959)
+++ trunk/ippScripts/scripts/ipp_cleanup.pl	(revision 29420)
@@ -53,8 +53,14 @@
 }
 
+# set this to 1 to enable checking for files on dead nodes
+# it is off for now because the implementation is a hack
+# See comments below.
+my $check_for_gone = 0;
+
 my $error_state;
-if ($mode eq "goto_cleaned")  { $error_state = "error_cleaned";  }
-if ($mode eq "goto_scrubbed") { $error_state = "error_scrubbed"; }
-if ($mode eq "goto_purged")   { $error_state = "error_purged";   }
+my $done_state;
+if ($mode eq "goto_cleaned")  { $error_state = "error_cleaned"; $done_state = "cleaned"; }
+if ($mode eq "goto_scrubbed") { $error_state = "error_scrubbed"; $done_state = "scrubbed";}
+if ($mode eq "goto_purged")   { $error_state = "error_purged";   $done_state = "purged";}
 
 
@@ -92,8 +98,9 @@
     }
 
-    # if there are no chipProcessedImfiles (@$stdout_buf == 0), the reset the state to 'new'
-    # XXX Why? This could just mean there's nothing to cleanup, or that we're trying to rerun an errored run.
+    # if there are no chipProcessedImfiles (@$stdout_buf == 0) then assume that we're done
+    # it could be that there are no chipProcessedImfiles at all if say if a run was changed from drop to goto_cleaned
+    # or of a run was set to update and then back to goto_cleaned before any images were processed
     if (@$stdout_buf == 0)  {
-        my $command = "$chiptool -chip_id $stage_id -updaterun -set_state $error_state";
+        my $command = "$chiptool -chip_id $stage_id -updaterun -set_state $done_state";
         $command .= " -dbname $dbname" if defined $dbname;
 
@@ -126,7 +133,12 @@
 
                 unless ($ipprc->file_exists($config_file)) {
-                    print STDERR "skipping cleanup for chipRun $stage_id $class_id "
-                        . " because config file ($config_file) is missing\n";
-                    $status = 0;
+                    if (file_gone($config_file)) {
+                        print STDERR "forcing cleanup for chipRun $stage_id $class_id "
+                            . " because config file ($config_file) is gone\n";
+                    } else {
+                        print STDERR "skipping cleanup for chipRun $stage_id $class_id "
+                            . " because config file ($config_file) is missing\n";
+                        $status = 0;
+                    }
                 }
             }
@@ -354,7 +366,9 @@
 
     if (@$stdout_buf == 0) {
-        # No skycells were found for some reason.
-        # Not technically an "error," but a "you told me to do X, and I can't. Please fix this yourself."
-        my $command = "$warptool -updaterun -warp_id $stage_id -set_state $error_state";
+        # No skycells were found for some reason. 
+        # it could be that there are no warpSkyfiles at all if say if a run was changed from drop to goto_cleaned
+        # or of a run was cleaned, set to update, and then back to goto_cleaned before any images were successfully
+        # updated
+        my $command = "$warptool -updaterun -warp_id $stage_id -set_state $done_state";
         $command .= " -dbname $dbname" if defined $dbname;
 
@@ -384,7 +398,12 @@
 
                 unless ($ipprc->file_exists($config_file)) {
-                    print STDERR "skipping cleanup for warpRun $stage_id $skycell_id" .
-                        " because config file is missing\n";
-                    $status = 0;
+                    if (file_gone($config_file)) {
+                        print STDERR "forcing cleanup for warpRun $stage_id $skycell_id" .
+                            " because config file is gone\n";
+                    } else {
+                        print STDERR "skipping cleanup for warpRun $stage_id $skycell_id" .
+                            " because config file is missing\n";
+                        $status = 0;
+                    }
                 }
             }
@@ -624,7 +643,8 @@
 
     if (@$stdout_buf == 0) {
-        # No skycells were found for some reason.
-        # Not technically an "error," but a "you told me to do X, and I can't. Please fix this yourself."
-        my $command = "$difftool -updaterun -diff_id $stage_id -set_state $error_state";
+        # No skycells were found for some reason. 
+        # it could be that there are no warpSkyfiles at all if say if a run was changed from drop to goto_cleaned
+        # or of a run was cleaned, set to update, and then back to goto_cleaned before any images were successfully
+        my $command = "$difftool -updaterun -diff_id $stage_id -set_state $done_state";
         $command .= " -dbname $dbname" if defined $dbname;
 
@@ -658,7 +678,12 @@
 
                 unless ($ipprc->file_exists($config_file)) {
-                    print STDERR "skipping cleanup for diffRun $stage_id $skycell_id" .
-                        " because config file ($config_file) is missing\n";
-                    $status = 0;
+                    if (file_gone($config_file)) {
+                        print STDERR "forcing cleanup for diffRun $stage_id $skycell_id" .
+                            " because config file ($config_file) is gone\n";
+                    } else {
+                        print STDERR "skipping cleanup for diffRun $stage_id $skycell_id" .
+                            " because config file ($config_file) is missing\n";
+                        $status = 0;
+                    }
                 }
             }
@@ -1665,4 +1690,59 @@
 }
 
+my $whichnode;
+sub file_gone
+{
+    # if $check_for_gone check whether the only instance of file is on a lost volumen
+    # XXX: we don't have a proper interface for this. 
+    # For now try to use Bill's hack the script 'whichnode'
+    return 0 if !$check_for_gone;
+
+    my $file = shift;
+
+    if (!$whichnode) {
+        $whichnode = can_run('whichnode') or
+            &my_die("Can't find whichnode", "chip", $stage_id, $PS_EXIT_CONFIG_ERROR);
+    }
+
+    my $command = "$whichnode $file";
+
+    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+        run(command => $command, verbose => $verbose);
+    unless ($success) {
+        $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+        &my_die("Unable to perform whichnode: $error_code", "chip", $stage_id, $error_code);
+    }
+
+    my @lines = split "\n", (join "", @$stdout_buf);
+    my $numGone = 0;
+    my $numNotGone = 0;
+    foreach my $line (@lines) {
+        chomp $line;
+
+        # output lines are either
+        #   "volume available"
+        # or 
+        #   "volume not available"
+
+        my ($volume, $answer, undef) = split " ", $line;
+        # our hack is if the volume has an X in the name it's gone
+        if ($volume =~ /X/) {
+            print STDERR "$file is on $volume which is gone\n";
+            $numGone++;
+        } elsif ($answer eq 'not') {
+            print STDERR "$file is on $volume which is not available\n";
+            $numNotGone++;
+        } else {
+            print STDERR "unexpected output from whichnode: $line\n";
+        }
+    }
+    # if there are any instances that are not on a gone node return 0
+    if ($numNotGone == 0 and $numGone > 0) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
 sub addFilename
 {
