IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Oct 27, 2010, 12:02:32 PM (16 years ago)
Author:
rhenders
Message:

Now checking that stages are not stuck; sending just one email, and no repeats

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tools/roboczar.pl

    r29374 r29581  
    2525$czarDb->setDateFormat("%Y%m%d-%H%i%s");
    2626
    27 my @stages = ("burntool", "chip", "cam", "fake", "warp", "stack", "diff", "magic", "magicDS", "dist");
    28 my @serversWeCareAbout = ("stdscience", "distribution", "summitcopy", "registration");
     27my @stages = ("burntool", "chip", "cam", "fake", "warp", "stack", "diff", "magic", "magicDS", "dist"); # TODO get from Pantasks
     28my @serversWeCareAbout = ("stdscience", "distribution", "summitcopy", "registration", "pstamp");
    2929
    3030
     31my $stuckMessage;
     32my $serversMessage;
     33my $message;
     34my $lastMessage = "";
     35my $anythingToReport;
    3136while(1) {
    3237
    33     checkServers("20 MINUTE");
     38    $anythingToReport = 0;
     39
     40    $message = "";
     41
     42    if (anyStoppedServers("20 MINUTE", \$serversMessage)) {
     43
     44        $message .= "\n\n" . $serversMessage;
     45        $anythingToReport = 1;
     46    }
     47    if (anyStuckStages("2 HOUR", \$stuckMessage)) {
     48
     49        $message .= "\n\n" . $stuckMessage;
     50        $anythingToReport = 1;
     51    }
     52
     53    if ($anythingToReport && $message ne $lastMessage) {
     54   
     55print "\n\n$message\n\n";
     56
     57        sendEmail(
     58                "roydhenderson\@gmail.com",
     59                "Roboczar warnings",
     60                "$message\n");
     61    }
     62
     63    $lastMessage = $message;
     64
     65    print "* Going to sleep\n";
    3466    sleep(1200);
     67}
     68
     69
     70###########################################################################
     71#
     72# Checks if anything is stuck
     73#
     74###########################################################################
     75sub anyStuckStages {
     76    my ($interval, $message) = @_;
     77
     78    my $end = $czarDb->getNowTimestamp();
     79    #$end = '2010-10-24 15';
     80    my $begin = $czarDb->subtractInterval($end, $interval);
     81    my $anyStuckStages = 0;
     82
     83    # exception - we don't care if burntool is stalled before 6:30am
     84    my $burntime = strftime('%Y-%m-%d 06:35', localtime);
     85    my $worryAboutBurntool = $czarDb->isBefore($burntime, $end);
     86
     87    ${$message} = "Processing stages:\n";
     88
     89    my $stage;
     90    foreach $stage (@stages) {
     91        my ($started, $finished, $stuck, $processed, $pending, $faults, $totalTime);
     92        $czarDb->runAnalysis(
     93                "all_stdscience_labels",
     94                $stage,
     95                $begin,
     96                $end,
     97                \$started,
     98                \$finished,
     99                \$stuck,
     100                \$processed,
     101                \$pending,
     102                \$faults,
     103                \$totalTime);
     104
     105        print "*     $stage $end $begin:\n";
     106        if (defined $started) {print "* Processing started at $started\n";}
     107        if (defined $finished) {print "* Processing finished at $finished and took $totalTime\n";}
     108        else {print "* Processing has not finished\n";}
     109        if (defined $stuck) {print "* Processing has been stuck since $stuck\n";}
     110        print "* $processed exposures have been processed, with $pending pending and $faults faults\n";
     111
     112        print "*******************************************************************************\n";
     113
     114        if ($stuck && $stage eq "burntool" && !$worryAboutBurntool) {next;}
     115
     116        if ($stuck) {
     117       
     118            ${$message} = ${$message} ."\n - '$stage' is stuck with $pending pending exposures (and $faults faults)";
     119            $anyStuckStages = 1;
     120        }
     121    }
     122
     123    return $anyStuckStages;
    35124}
    36125
    37126###########################################################################
    38127#
    39 # Checks tha the important servers are running
     128# Checks that the important servers are running
    40129#
    41130###########################################################################
    42 sub checkServers {
    43     my ($interval) = @_;
     131sub anyStoppedServers {
     132    my ($interval, $message) = @_;
    44133
     134    my $anythingToReport = 0;
    45135    my $server;
     136    my $since;
     137
     138    ${$message} = "Pantasks servers:\n";
     139
    46140    foreach $server (@serversWeCareAbout) {
    47141
    48         if ($czarDb->isServerDown($server, $interval)) {
     142        # is server alice?
     143        if ($czarDb->isServerDown($server, $interval, \$since)) {
    49144
    50             print "$server has been down for the last $interval\n";
    51             sendEmail(
    52                     "roydhenderson\@gmail.com",
    53                     "roboczar\@ipp.com",
    54                     "Roboczar update",
    55                     "\n\n* '$server' server has been down for the last $interval\n\n");
     145            ${$message} = ${$message} . "\n - '$server' has been DOWN since '$since'";
     146            $anythingToReport = 1;
    56147        }
    57         else {
    58             #print "$server has been running for some of the last $interval\n";
     148        # is it running?
     149        elsif ($czarDb->isServerStopped($server, $interval, \$since)) {
     150
     151            ${$message} = ${$message} . "\n - '$server' has been stopped since '$since'";
     152            $anythingToReport = 1;
    59153        }
    60154    }
     155
     156    return $anythingToReport;
    61157}
    62158
     
    67163###########################################################################
    68164sub sendEmail {
    69     my ($to, $from, $subject, $message) = @_;
     165    my ($to, $subject, $message) = @_;
    70166
    71167    my $sendmail = '/usr/lib/sendmail';
    72168    open(MAIL, "|$sendmail -oi -t");
    73     print MAIL "From: $from\n";
     169    print MAIL "From: roboczar\@ipp.org\n";
    74170    print MAIL "To: $to\n";
    75171    print MAIL "Subject: $subject\n\n";
Note: See TracChangeset for help on using the changeset viewer.