Index: trunk/ippScripts/scripts/ipp_cleanup.pl
===================================================================
--- trunk/ippScripts/scripts/ipp_cleanup.pl	(revision 33664)
+++ trunk/ippScripts/scripts/ipp_cleanup.pl	(revision 34773)
@@ -18,5 +18,12 @@
 
 # Parse the command-line arguments
-my ($stage, $camera, $stage_id, $mode, $path_base, $dbname, $verbose, $no_op, $helplist, $logfile);
+my ($stage, $camera, $stage_id, $mode, $path_base, $dbname, $verbose, $no_op, $helplist, $logfile, $check_all);
+my $very_verbose = 0;
+
+# this gets set to 1 the first time we set the corresponding destreak run to be cleaned
+#my $ds_done = 0;
+# magic is dead
+my $ds_done = 1;
+
 GetOptions('stage=s'        => \$stage,     # which analysis stage to clean?
            'camera|i=s'     => \$camera,    # user-supplied camera name
@@ -24,4 +31,5 @@
            'mode|m=s'       => \$mode,      # cleanup mode (clean / purge)
            'path_base=s'    => \$path_base, # basename for files
+           'check-all'      => \$check_all, # if set clean all chips regardless of data_state
            'dbname|d=s'     => \$dbname,    # Database name
            'verbose'        => \$verbose,   # Print to stdout
@@ -56,4 +64,7 @@
 $ipprc->redirect_output($logfile) or 
         &my_die("Unable to redirect ouput", $stage, $stage_id, $PS_EXIT_UNKNOWN_ERROR) if $logfile;
+
+
+my $bzip2 = can_run('bzip2') or die 'cannot find bzip2\n';
 
 # set this to 1 to enable checking for files on dead nodes
@@ -91,4 +102,5 @@
     # this stage uses 'chiptool'
     my $chiptool = can_run('chiptool') or die "Can't find chiptool";
+    my $censorObjects = can_run('censorObjects') or die "Can't find censorObjects";
 
     # Get list of component imfiles
@@ -96,6 +108,9 @@
     my $imfiles;                      # Array of component files
     my $command = "$chiptool -pendingcleanupimfile -chip_id $stage_id"; # Command to run
+    $command .= ' -all' if ($check_all);
     $command .= " -dbname $dbname" if defined $dbname;
-    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = run(command => $command, verbose => $verbose);
+
+    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) 
+        = run(command => $command, verbose => $very_verbose);
     unless ($success) {
         $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
@@ -114,5 +129,5 @@
         unless ($success) {
             $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
-            &my_die("Unable to perform chiptool: $error_code", "chip", $stage_id, $error_code);
+            &my_die("Unable to perform chiptool -processedimfile: $error_code", "chip", $stage_id, $error_code);
         }
         exit 0;
@@ -123,28 +138,104 @@
         &my_die("Unable to parse metadata config doc", "chip", $stage_id, $PS_EXIT_PROG_ERROR);
 
+    my $numchips = scalar @$imfiles;
+    print "Found $numchips to clean\n";
+
+    my $clean_sources = 0;
+    if ((scalar @$imfiles > 0) and ($mode eq 'goto_cleaned')) {
+        # go and find the smf file(s) for the associated camRun and check the status of the file
+        # if a good one is found we have the sources for this chipRun and thus can clean the cmfs
+        my $command = "$chiptool -listrun -chip_id $stage_id";
+        $command .= " -dbname $dbname" if defined $dbname;
+        my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = run(command => $command, verbose => $very_verbose);
+        unless ($success) {
+            $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+            &my_die("Unable to perform chiptool -listrun: $error_code", "chip", $stage_id, $error_code);
+        }
+        my $entries = $mdcParser->parse_list(join "", @$stdout_buf) or
+            &my_die("Unable to parse metadata config doc", "chip", $stage_id, $PS_EXIT_PROG_ERROR);
+        my $good_smf = 0;
+        foreach my $entry (@$entries) {
+            my $camRun_state = $entry->{camRun_state};
+            next if $camRun_state  ne 'full';
+            my $cam_id = $entry->{cam_id};
+            if (!$cam_id) {
+                carp('no cam_id for listrun entry');
+                next;
+            }
+            my $cam_path_base = $entry->{cam_path_base};
+            if ( !defined $cam_path_base ) {
+                carp("no path_base for $cam_id\n");
+                next;
+            }
+
+            # XXX: This assumes that the filerules are filerules-split
+            my $smf =  $ipprc->filename("PSASTRO.OUTPUT", $cam_path_base);
+            if (!$ipprc->file_exists($smf)) {
+                carp("smf for $cam_path_base not found");
+                next;
+            }
+            # we run the program censorObjects in the check mode
+            # If this program succeeds the smf is a valid fits file and each of the
+            # extensions was succesfully read.
+            # XXX: create a new program outside of magic that performs this check
+
+            my $command = "$censorObjects -checkinputonly -file $smf";
+            my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 
+                                        run(command => $command, verbose => $very_verbose);
+            unless ($success) {
+                $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+                print STDERR "censorObjects failed:\n";
+                print STDERR "\nSTDOUT:\n" . join "", @$stdout_buf;
+                print STDERR "\nSTDERR:\n" . join "", @$stderr_buf;
+                &my_die("Unable to perform censorObjects -checkinputonly: $error_code", "chip", $stage_id, $error_code);
+            }
+            $good_smf++;
+        }
+        if ($good_smf) {
+            # we have a good one so we can clean the sources
+            $clean_sources = 1;
+            print "Found $good_smf good smf files will clean sources\n";
+        } else {
+            print "Unable to find good smf file will NOT clean sources\n";
+        }
+    }
+
     # loop over all of the imfiles, determine the path_base and class_id for each
+    my $num_errors = 0;
     foreach my $imfile (@$imfiles) {
         my $class_id = $imfile->{class_id};
         my $path_base = $imfile->{path_base};
+        my $data_state = $imfile->{data_state};
         my $status = 1;
         $status = 0 unless defined $path_base and $path_base ne "NULL";
 
-        my $poor_quality = $imfile->{quality} > 0;
+        my $quality = $imfile->{quality};
+        my $good_quality = ($quality == 0);
+
+        print "Starting cleanup for $class_id\n";
 
         # don't clean up unless the data needed to update is available
         # modes goto_purged and goto_scrubbed will remove files even if the config is non-existent
         # goto_scrubbed now requires the config file to not exist.
-        if ($status and !$poor_quality) {
+        if ($status and $good_quality) {
             if ($mode eq "goto_cleaned") {
                 my $config_file = $ipprc->filename("PPIMAGE.CONFIG", $path_base, $class_id);
 
                 unless ($ipprc->file_exists($config_file)) {
+                    my $fault = $imfile->{fault};
+
                     if (file_gone($config_file)) {
-                        print STDERR "forcing cleanup for chipRun $stage_id $class_id "
+                        # config file was lost. Clean up. If the chip is ever updated a new config
+                        # file will be created
+                        print STDERR "forcing cleanup chip $stage_id $class_id fault: $fault quality: $quality"
                             . " because config file ($config_file) is gone\n";
+                    } elsif ($fault == 0 and $quality == 0) {
+                            print STDERR "skipping cleaning up chip $stage_id $class_id fault: $fault quality: $quality"
+                                . " because config file ($config_file) is missing\n";
+                            $status = 0;
                     } else {
-                        print STDERR "skipping cleanup for chipRun $stage_id $class_id "
-                            . " because config file ($config_file) is missing\n";
-                        $status = 0;
+                            # config file is missing but this is a bad chip anyways so clean it
+                            print STDERR "cleaning up chip $stage_id $class_id fault: $fault quality: $quality"
+                                . " even though config file ($config_file) is missing\n";
                     }
                 }
@@ -154,5 +245,5 @@
 
                 if ($ipprc->file_exists($config_file)) {
-                    print STDERR "skipping scrubbed for chipRun $stage_id $class_id "
+                    print STDERR "skipping scrubbed for chip $stage_id $class_id "
                         . " because config file ($config_file) is present\n";
                     $status = 0;
@@ -165,21 +256,30 @@
             my @files = ();
 
-            # delete the temporary image datafiles
-#            addFilename (\@files, "PPIMAGE.OUTPUT", $path_base, $class_id);
-#            addFilename (\@files, "PPIMAGE.OUTPUT.MASK", $path_base, $class_id);
-#            addFilename (\@files, "PPIMAGE.OUTPUT.VARIANCE", $path_base, $class_id);
             addFilename (\@files, "PPIMAGE.CHIP", $path_base, $class_id, 1);
             addFilename (\@files, "PPIMAGE.CHIP.MASK", $path_base, $class_id, 1);
             addFilename (\@files, "PPIMAGE.CHIP.VARIANCE", $path_base, $class_id, 1);
 	    addFilename (\@files, "PPIMAGE.PATTERN", $path_base, $class_id, 0);
+            if ($clean_sources) {
+                addFilename (\@files, "PSPHOT.OUTPUT", $path_base, $class_id);
+                addFilename (\@files, "PPIMAGE.BIN1", $path_base, $class_id);
+            }
             if ($mode eq "goto_purged") {
                 # additional files to remove for 'purge' mode
+                if (!$clean_sources) {
+                    # these weren't added above but we do want to clean it
+                    addFilename (\@files, "PSPHOT.OUTPUT", $path_base, $class_id);
+                    addFilename (\@files, "PPIMAGE.BIN1", $path_base, $class_id);
+                }
+                
+                # background model is needed to build stack background images so we do not remove it
+                # addFilename (\@files, "PSPHOT.BACKMDL", $path_base, $class_id);
+
+                addFilename (\@files, "PSPHOT.PSF.SAVE", $path_base, $class_id);
                 addFilename (\@files, "PPIMAGE.OUTPUT.FPA1", $path_base, $class_id);
                 addFilename (\@files, "PPIMAGE.OUTPUT.FPA2", $path_base, $class_id);
-                addFilename (\@files, "PPIMAGE.BIN1", $path_base, $class_id);          # clean?
-                addFilename (\@files, "PPIMAGE.BIN2", $path_base, $class_id);          # clean?
+                addFilename (\@files, "PPIMAGE.BIN2", $path_base, $class_id);
                 addFilename (\@files, "PPIMAGE.JPEG1", $path_base, $class_id);
                 addFilename (\@files, "PPIMAGE.JPEG2", $path_base, $class_id);
-                addFilename (\@files, "PPIMAGE.STATS", $path_base, $class_id);         #clean?
+                addFilename (\@files, "PPIMAGE.STATS", $path_base, $class_id);
                 addFilename (\@files, "PPIMAGE.CONFIG", $path_base, $class_id);
             }
@@ -188,29 +288,42 @@
             $status = &delete_files (\@files);
         }
+        bzip2_file("LOG.IMFILE", $path_base, $class_id);
+        bzip2_file("LOG.IMFILE.UPDATE", $path_base, $class_id);
 
         if ($status)  {
+            my $update_chip = 1;
             my $command = "$chiptool -chip_id $stage_id -class_id $class_id";
             if ($mode eq "goto_purged") {
                 $command .= " -topurgedimfile";
+                if ($data_state eq 'purged') {
+                    $update_chip = 0;
+                }
             }
             elsif ($mode eq "goto_cleaned") {
                 $command .= " -tocleanedimfile";
+                if ($data_state eq 'cleaned') {
+                    $update_chip = 0;
+                }
             }
             elsif ($mode eq "goto_scrubbed") {
                 $command .= " -toscrubbedimfile";
-            }
-
-            $command .= " -dbname $dbname" if defined $dbname;
-
-            my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
-                    run(command => $command, verbose => $verbose);
-            unless ($success) {
-                $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
-                &my_die("Unable to perform chiptool: $error_code", "chip", $stage_id, $error_code);
-            }
-
-            set_destreak_goto_cleaned();
-
+                if ($data_state eq 'scrubbed') {
+                    $update_chip = 0;
+                }
+            }
+
+            if ($update_chip) {
+                $command .= " -dbname $dbname" if defined $dbname;
+
+                my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+                        run(command => $command, verbose => $verbose);
+                unless ($success) {
+                    $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+                    &my_die("Unable to perform chiptool: $error_code", "chip", $stage_id, $error_code);
+                }
+                set_destreak_goto_cleaned();
+            }
         } else {
+            $num_errors++;
 
             # if an error happens for one chip, the chipRun will stay in goto_*, but the chips will go to error_* (matching the goto_*)
@@ -218,6 +331,4 @@
             $command .= " -dbname $dbname" if defined $dbname;
 
-if (0) {
-        # XXX Don't set components to error cleaned anymore
             my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
                     run(command => $command, verbose => $verbose);
@@ -226,5 +337,4 @@
                 &my_die("Unable to perform chiptool: $error_code", "chip", $stage_id, $error_code);
             }
-}
 
             # We want to flag the run as well, to avoid attempting to reprocess the same data over and over again.
@@ -241,4 +351,7 @@
         }
     }
+    print "Cleanup completed for chip_id $stage_id.";
+    print " num_errors: $num_errors" if $num_errors;
+    print "\n";
     exit 0;
 }
@@ -350,7 +463,8 @@
     my $skyfiles;                      # Array of component files
     my $command = "$warptool -pendingcleanupskyfile -warp_id $stage_id"; # Command to run
+    $command .= ' -all' if $check_all;
     $command .= " -dbname $dbname" if defined $dbname;
     my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
-            run(command => $command, verbose => $verbose);
+            run(command => $command, verbose => $very_verbose);
     unless ($success) {
         $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
@@ -378,8 +492,13 @@
         &my_die("Unable to parse metadata config doc", "warp", $stage_id, $PS_EXIT_PROG_ERROR);
 
+    my $numskycells = scalar @$skyfiles;
+    print "Found $numskycells to clean\n";
+
     my @files = ();
+    my $num_errors = 0;
     foreach my $skyfile (@$skyfiles) {
         my $path_base = $skyfile->{path_base};
         my $skycell_id = $skyfile->{skycell_id};
+        my $data_state = $skyfile->{data_state};
 
         my $status = 1;
@@ -391,11 +510,17 @@
 
                 unless ($ipprc->file_exists($config_file)) {
+                    my $fault = $skyfile->{fault};
+                    my $quality = $skyfile->{quality};
                     if (file_gone($config_file)) {
-                        print STDERR "forcing cleanup for warpRun $stage_id $skycell_id" .
-                            " because config file is gone\n";
+                        print STDERR "forcing cleanup warp $stage_id $skycell_id fault: $fault quality: $quality"
+                            . " because config file ($config_file) is gone\n";
+                    } elsif ($fault == 0 and $quality == 0) {
+                            print STDERR "skipping cleaning up warp $stage_id $skycell_id fault: $fault quality: $quality"
+                                . " because config file ($config_file) is missing\n";
+                            $status = 0;
                     } else {
-                        print STDERR "skipping cleanup for warpRun $stage_id $skycell_id" .
-                            " because config file is missing\n";
-                        $status = 0;
+                            # config file is missing but this is a bad warp anyways so clean it
+                            print STDERR "cleaning up warp $stage_id $skycell_id fault: $fault quality: $quality"
+                                . " even though config file ($config_file) is missing\n";
                     }
                 }
@@ -413,5 +538,6 @@
 
         if ($status) {
-            if ($skyfile->{quality} != 8007) {
+            # XXX: what is special about quality == 8007?
+            if ($skyfile->{quality} != 8007 || $check_all) {
                 my @files = ();
 
@@ -420,5 +546,7 @@
                 addFilename(\@files, "PSWARP.OUTPUT.MASK", $path_base, $skycell_id, 1);
                 addFilename(\@files, "PSWARP.OUTPUT.VARIANCE", $path_base, $skycell_id, 1);
-#            addFilename(\@files, "PSWARP.OUTPUT.SOURCES", $path_base, $skycell_id);
+                # these are rebuilt during update so we can delete them here
+                addFilename(\@files, "PSWARP.OUTPUT.SOURCES", $path_base, $skycell_id);
+                addFilename(\@files, "SKYCELL.TEMPLATE", $path_base, $skycell_id );
                 if ($mode eq "goto_purged") {
                     # additional files to remove for 'purge' mode
@@ -426,30 +554,58 @@
                     addFilename(\@files, "PSWARP.BIN2", $path_base, $skycell_id );
                     addFilename(\@files, "SKYCELL.STATS", $path_base, $skycell_id );
-                    # addFilename(\@files, "PSPHOT.PSF.SKY.SAVE", $path_base);
-
-                    # XXX: do we want to delete these?
+                    addFilename(\@files, "SKYCELL.STATS.UPDATE", $path_base, $skycell_id );
+                    addFilename(\@files, "PSWARP.CONFIG", $path_base, $skycell_id);
+
+                    # XXX: do we want to delete these? trace file is empty
                     # addFilename(\@files, "TRACE.EXP", $path_base, $skycell_id);
-                    # addFilename(\@files, "PSWARP.CONFIG", $path_base, $skycell_id);
-                }
-            # actual command to delete the files
+                }
+                # actual command to delete the files
                 $status = &delete_files (\@files);
             }
         }
+        bzip2_file("LOG.EXP", $path_base, $skycell_id);
+        bzip2_file("LOG.EXP.UPDATE", $path_base, $skycell_id);
 
         if ($status)  {
+            my $update_skyfile = 1;
             my $command = "$warptool -warp_id $stage_id -skycell_id $skycell_id";
             if ($mode eq "goto_purged") {
                 $command .= " -topurgedskyfile";
+                if ($data_state eq 'purged') {
+                    $update_skyfile = 0;
+                }
             }
             elsif ($mode eq "goto_cleaned") {
                 $command .= " -tocleanedskyfile";
+                if ($data_state eq 'cleaned') {
+                    $update_skyfile = 0;
+                }
             }
             elsif ($mode eq "goto_scrubbed") {
                 $command .= " -toscrubbedskyfile";
-            }
-            $command .= " -dbname $dbname" if defined $dbname;
-
-            my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
-                    run(command => $command, verbose => $verbose);
+                if ($data_state eq 'scrubbed') {
+                    $update_skyfile = 0;
+                }
+            }
+            $command .= " -dbname $dbname" if defined $dbname;
+
+            if ($update_skyfile) {
+                my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+                        run(command => $command, verbose => $verbose);
+                unless ($success) {
+                    $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+                    &my_die("Unable to perform warptool: $error_code", "warp", $stage_id, $error_code);
+                }
+
+                set_destreak_goto_cleaned();
+            }
+
+         } else {
+            $num_errors++;
+            my $command = "$warptool -updateskyfile -warp_id $stage_id -skycell_id $skycell_id -set_state $error_state";
+            $command .= " -dbname $dbname" if defined $dbname;
+
+            my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+                run(command => $command, verbose => $verbose);
             unless ($success) {
                 $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
@@ -457,11 +613,9 @@
             }
 
-            set_destreak_goto_cleaned();
-
-         } else {
-            my $command = "$warptool -updateskyfile -warp_id $stage_id -skycell_id $skycell_id -set_state $error_state";
-            $command .= " -dbname $dbname" if defined $dbname;
-
-            my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+            # We want to flag the run as well, to avoid attempting to reprocess the same data over and over again.
+            $command = "$warptool -warp_id $stage_id -updaterun -set_state $error_state";
+            $command .= " -dbname $dbname" if defined $dbname;
+
+            ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
                 run(command => $command, verbose => $verbose);
             unless ($success) {
@@ -469,8 +623,9 @@
                 &my_die("Unable to perform warptool: $error_code", "warp", $stage_id, $error_code);
             }
-
-            #            exit $PS_EXIT_UNKNOWN_ERROR;
-        }
-    }
+        }
+    }
+    print "Cleanup completed for warp_id $stage_id.";
+    print " num_errors: $num_errors" if $num_errors;
+    print "\n";
     exit 0;
 }
@@ -603,5 +758,4 @@
                 &my_die("Unable to perform stacktool: $error_code", "stack", $stage_id, $error_code);
             }
-#           exit $PS_EXIT_UNKNOWN_ERROR;
         }
     }
@@ -619,6 +773,7 @@
     my $skyfiles;                  # Array reference of component files
     my $command = "difftool -pendingcleanupskyfile -diff_id $stage_id"; # Command to run
+    $command .= ' -all' if $check_all;
     $command .= " -dbname $dbname" if defined $dbname;
-    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = run(command => $command, verbose => $verbose);
+    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = run(command => $command, verbose => $very_verbose);
     unless ($success) {
         $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
@@ -646,8 +801,10 @@
         &my_die("Unable to parse metadata config doc", "diff", $stage_id, $PS_EXIT_PROG_ERROR);
 
+    my $num_errors = 0;
     my @files = ();
     foreach my $skyfile (@{ $skyfiles }) {
         my $path_base = $skyfile->{path_base};
         my $skycell_id = $skyfile->{skycell_id};
+        my $data_state = $skyfile->{data_state};
 
         my $status = 1;
@@ -661,4 +818,5 @@
                 my $config_file = $ipprc->filename("PPSUB.CONFIG", $path_base, $skycell_id);
 
+            if (0) {
                 unless ($ipprc->file_exists($config_file)) {
                     if (file_gone($config_file)) {
@@ -672,4 +830,21 @@
                 }
             }
+                unless ($ipprc->file_exists($config_file)) {
+                    my $fault = $skyfile->{fault};
+                    my $quality = $skyfile->{quality};
+                    if (file_gone($config_file)) {
+                        print STDERR "forcing cleanup diff $stage_id $skycell_id fault: $fault quality: $quality"
+                            . " because config file ($config_file) is gone\n";
+                    } elsif ($fault == 0 and $quality == 0) {
+                            print STDERR "skipping cleaning up diff $stage_id $skycell_id fault: $fault quality: $quality"
+                                . " because config file ($config_file) is missing\n";
+                            $status = 0;
+                    } else {
+                            # config file is missing but this is a bad diff anyways so clean it
+                            print STDERR "cleaning up diff $stage_id $skycell_id fault: $fault quality: $quality"
+                                . " even though config file ($config_file) is missing\n";
+                    }
+                }
+            }
             elsif ($mode eq "goto_scrubbed") {
                 my $config_file = $ipprc->filename("PPSUB.CONFIG", $path_base, $skycell_id);
@@ -712,21 +887,49 @@
 
             }
-#           print STDERR "MY FILES: @files\n";
             $status = &delete_files(\@files);
         }
-#       print STDERR "MY STATUS: $status\n";
+
+        bzip2_file("LOG.EXP", $path_base, $skycell_id);
+        bzip2_file("LOG.EXP.UPDATE", $path_base, $skycell_id);
+
         if ($status) {
             my $command = "$difftool -diff_id $stage_id -skycell_id $skycell_id";
+            my $update_skyfile = 1;
 
             if ($mode eq "goto_purged") {
                 $command .= " -topurgedskyfile";
+                if ($data_state eq 'purged') {
+                    $update_skyfile = 0;
+                }
             }
             elsif ($mode eq "goto_cleaned") {
                 $command .= " -tocleanedskyfile";
+                if ($data_state eq 'cleaned') {
+                    $update_skyfile = 0;
+                }
             }
             elsif ($mode eq "goto_scrubbed") {
                 $command .= " -toscrubbedskyfile";
-            }
-
+                if ($data_state eq 'scrubbed') {
+                    $update_skyfile = 0;
+                }
+            }
+
+            $command .= " -dbname $dbname" if defined $dbname;
+
+            if ($update_skyfile) {
+                my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+                    run(command => $command, verbose => $verbose);
+                unless ($success) {
+                    $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+                    &my_die("Unable to perform difftool: $error_code", "diff", $stage_id, $error_code);
+                }
+                set_destreak_goto_cleaned();
+            }
+
+
+        } else {
+            $num_errors++;
+            my $command = "$difftool -updatediffskyfile -diff_id $stage_id -skycell_id $skycell_id -set_state $error_state";
             $command .= " -dbname $dbname" if defined $dbname;
 
@@ -738,12 +941,9 @@
             }
 
-            set_destreak_goto_cleaned();
-
-        } else {
-            my $command = "$difftool -updaterun -diff_id $stage_id -set_state $error_state";
-
-            $command .= " -dbname $dbname" if defined $dbname;
-
-            my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+            $command = "$difftool -updaterun -diff_id $stage_id -set_state $error_state";
+
+            $command .= " -dbname $dbname" if defined $dbname;
+
+            ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
                 run(command => $command, verbose => $verbose);
             unless ($success) {
@@ -751,7 +951,9 @@
                 &my_die("Unable to perform difftool: $error_code", "diff", $stage_id, $error_code);
             }
-#           exit $PS_EXIT_UNKNOWN_ERROR;
-        }
-    }
+        }
+    }
+    print "Cleanup completed for diff_id $stage_id.";
+    print " num_errors: $num_errors" if $num_errors;
+    print "\n";
     exit 0;
 }
@@ -1917,5 +2119,5 @@
 
     foreach my $file (@$files) {
-        print STDERR "unlinking $stage $stage_id $file\n";
+        print STDERR "unlinking $stage $stage_id $file\n" if $very_verbose;
 
         my $error_code = $ipprc->kill_file($file);
@@ -2008,6 +2210,4 @@
 }
 
-# this gets set to 1 the first time we set the corresponding destreak run to be cleaned
-my $ds_done = 0;
 sub set_destreak_goto_cleaned {
 
@@ -2027,4 +2227,37 @@
 }
 
+sub bzip2_file {
+    my $filerule = shift;
+    my $path_base = shift;
+    my $component = shift;
+
+    my $filename = $ipprc->filename($filerule, $path_base, $component);
+    if (!$ipprc->file_exists($filename)) {
+        return 1;
+    }
+    if (my $resolved = $ipprc->file_resolve($filename)) {
+        my $bzip2_filename = $filename . '.bz2';
+        if ($ipprc->file_exists($bzip2_filename)) {
+            $ipprc->kill_file($bzip2_filename);
+        }
+        my $bzip2_file = $ipprc->file_create($bzip2_filename);
+        my_die("Unable to create $bzip2_filename", $stage_id, $PS_EXIT_SYS_ERROR) unless $bzip2_file;
+
+        my $command = "$bzip2 < $resolved > $bzip2_file";
+        my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
+            run(command => $command, verbose => $very_verbose);
+        if ($success) {
+            # success delete the original file
+            my $error_code = $ipprc->kill_file($filename);
+        } else {
+            # if bzip2 failed. Carry on but don't delete the existing file
+            $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
+            print STDERR "Failed to bzip2 $filename: $error_code\n";
+            return 0;
+        }
+    }
+    return 1;
+}
+
 # XXX we currently do not set the error state in the db on my_die
 sub my_die
