IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Oct 14, 2010, 2:55:20 PM (16 years ago)
Author:
bills
Message:

add hook (not enabled) to check for files on lost nodes.
don't fail if there are no files left to clean up. This isn't an error.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/ippScripts/scripts/ipp_cleanup.pl

    r27959 r29420  
    5353}
    5454
     55# set this to 1 to enable checking for files on dead nodes
     56# it is off for now because the implementation is a hack
     57# See comments below.
     58my $check_for_gone = 0;
     59
    5560my $error_state;
    56 if ($mode eq "goto_cleaned")  { $error_state = "error_cleaned";  }
    57 if ($mode eq "goto_scrubbed") { $error_state = "error_scrubbed"; }
    58 if ($mode eq "goto_purged")   { $error_state = "error_purged";   }
     61my $done_state;
     62if ($mode eq "goto_cleaned")  { $error_state = "error_cleaned"; $done_state = "cleaned"; }
     63if ($mode eq "goto_scrubbed") { $error_state = "error_scrubbed"; $done_state = "scrubbed";}
     64if ($mode eq "goto_purged")   { $error_state = "error_purged";   $done_state = "purged";}
    5965
    6066
     
    9298    }
    9399
    94     # if there are no chipProcessedImfiles (@$stdout_buf == 0), the reset the state to 'new'
    95     # XXX Why? This could just mean there's nothing to cleanup, or that we're trying to rerun an errored run.
     100    # if there are no chipProcessedImfiles (@$stdout_buf == 0) then assume that we're done
     101    # it could be that there are no chipProcessedImfiles at all if say if a run was changed from drop to goto_cleaned
     102    # or of a run was set to update and then back to goto_cleaned before any images were processed
    96103    if (@$stdout_buf == 0)  {
    97         my $command = "$chiptool -chip_id $stage_id -updaterun -set_state $error_state";
     104        my $command = "$chiptool -chip_id $stage_id -updaterun -set_state $done_state";
    98105        $command .= " -dbname $dbname" if defined $dbname;
    99106
     
    126133
    127134                unless ($ipprc->file_exists($config_file)) {
    128                     print STDERR "skipping cleanup for chipRun $stage_id $class_id "
    129                         . " because config file ($config_file) is missing\n";
    130                     $status = 0;
     135                    if (file_gone($config_file)) {
     136                        print STDERR "forcing cleanup for chipRun $stage_id $class_id "
     137                            . " because config file ($config_file) is gone\n";
     138                    } else {
     139                        print STDERR "skipping cleanup for chipRun $stage_id $class_id "
     140                            . " because config file ($config_file) is missing\n";
     141                        $status = 0;
     142                    }
    131143                }
    132144            }
     
    354366
    355367    if (@$stdout_buf == 0) {
    356         # No skycells were found for some reason.
    357         # Not technically an "error," but a "you told me to do X, and I can't. Please fix this yourself."
    358         my $command = "$warptool -updaterun -warp_id $stage_id -set_state $error_state";
     368        # No skycells were found for some reason.
     369        # it could be that there are no warpSkyfiles at all if say if a run was changed from drop to goto_cleaned
     370        # or of a run was cleaned, set to update, and then back to goto_cleaned before any images were successfully
     371        # updated
     372        my $command = "$warptool -updaterun -warp_id $stage_id -set_state $done_state";
    359373        $command .= " -dbname $dbname" if defined $dbname;
    360374
     
    384398
    385399                unless ($ipprc->file_exists($config_file)) {
    386                     print STDERR "skipping cleanup for warpRun $stage_id $skycell_id" .
    387                         " because config file is missing\n";
    388                     $status = 0;
     400                    if (file_gone($config_file)) {
     401                        print STDERR "forcing cleanup for warpRun $stage_id $skycell_id" .
     402                            " because config file is gone\n";
     403                    } else {
     404                        print STDERR "skipping cleanup for warpRun $stage_id $skycell_id" .
     405                            " because config file is missing\n";
     406                        $status = 0;
     407                    }
    389408                }
    390409            }
     
    624643
    625644    if (@$stdout_buf == 0) {
    626         # No skycells were found for some reason.
    627         # Not technically an "error," but a "you told me to do X, and I can't. Please fix this yourself."
    628         my $command = "$difftool -updaterun -diff_id $stage_id -set_state $error_state";
     645        # No skycells were found for some reason.
     646        # it could be that there are no warpSkyfiles at all if say if a run was changed from drop to goto_cleaned
     647        # or of a run was cleaned, set to update, and then back to goto_cleaned before any images were successfully
     648        my $command = "$difftool -updaterun -diff_id $stage_id -set_state $done_state";
    629649        $command .= " -dbname $dbname" if defined $dbname;
    630650
     
    658678
    659679                unless ($ipprc->file_exists($config_file)) {
    660                     print STDERR "skipping cleanup for diffRun $stage_id $skycell_id" .
    661                         " because config file ($config_file) is missing\n";
    662                     $status = 0;
     680                    if (file_gone($config_file)) {
     681                        print STDERR "forcing cleanup for diffRun $stage_id $skycell_id" .
     682                            " because config file ($config_file) is gone\n";
     683                    } else {
     684                        print STDERR "skipping cleanup for diffRun $stage_id $skycell_id" .
     685                            " because config file ($config_file) is missing\n";
     686                        $status = 0;
     687                    }
    663688                }
    664689            }
     
    16651690}
    16661691
     1692my $whichnode;
     1693sub file_gone
     1694{
     1695    # if $check_for_gone check whether the only instance of file is on a lost volumen
     1696    # XXX: we don't have a proper interface for this.
     1697    # For now try to use Bill's hack the script 'whichnode'
     1698    return 0 if !$check_for_gone;
     1699
     1700    my $file = shift;
     1701
     1702    if (!$whichnode) {
     1703        $whichnode = can_run('whichnode') or
     1704            &my_die("Can't find whichnode", "chip", $stage_id, $PS_EXIT_CONFIG_ERROR);
     1705    }
     1706
     1707    my $command = "$whichnode $file";
     1708
     1709    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
     1710        run(command => $command, verbose => $verbose);
     1711    unless ($success) {
     1712        $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR);
     1713        &my_die("Unable to perform whichnode: $error_code", "chip", $stage_id, $error_code);
     1714    }
     1715
     1716    my @lines = split "\n", (join "", @$stdout_buf);
     1717    my $numGone = 0;
     1718    my $numNotGone = 0;
     1719    foreach my $line (@lines) {
     1720        chomp $line;
     1721
     1722        # output lines are either
     1723        #   "volume available"
     1724        # or
     1725        #   "volume not available"
     1726
     1727        my ($volume, $answer, undef) = split " ", $line;
     1728        # our hack is if the volume has an X in the name it's gone
     1729        if ($volume =~ /X/) {
     1730            print STDERR "$file is on $volume which is gone\n";
     1731            $numGone++;
     1732        } elsif ($answer eq 'not') {
     1733            print STDERR "$file is on $volume which is not available\n";
     1734            $numNotGone++;
     1735        } else {
     1736            print STDERR "unexpected output from whichnode: $line\n";
     1737        }
     1738    }
     1739    # if there are any instances that are not on a gone node return 0
     1740    if ($numNotGone == 0 and $numGone > 0) {
     1741        return 1;
     1742    } else {
     1743        return 0;
     1744    }
     1745}
     1746
    16671747sub addFilename
    16681748{
Note: See TracChangeset for help on using the changeset viewer.