Changeset 29581
- Timestamp:
- Oct 27, 2010, 12:02:32 PM (16 years ago)
- File:
-
- 1 edited
-
trunk/tools/roboczar.pl (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/tools/roboczar.pl
r29374 r29581 25 25 $czarDb->setDateFormat("%Y%m%d-%H%i%s"); 26 26 27 my @stages = ("burntool", "chip", "cam", "fake", "warp", "stack", "diff", "magic", "magicDS", "dist"); 28 my @serversWeCareAbout = ("stdscience", "distribution", "summitcopy", "registration" );27 my @stages = ("burntool", "chip", "cam", "fake", "warp", "stack", "diff", "magic", "magicDS", "dist"); # TODO get from Pantasks 28 my @serversWeCareAbout = ("stdscience", "distribution", "summitcopy", "registration", "pstamp"); 29 29 30 30 31 my $stuckMessage; 32 my $serversMessage; 33 my $message; 34 my $lastMessage = ""; 35 my $anythingToReport; 31 36 while(1) { 32 37 33 checkServers("20 MINUTE"); 38 $anythingToReport = 0; 39 40 $message = ""; 41 42 if (anyStoppedServers("20 MINUTE", \$serversMessage)) { 43 44 $message .= "\n\n" . $serversMessage; 45 $anythingToReport = 1; 46 } 47 if (anyStuckStages("2 HOUR", \$stuckMessage)) { 48 49 $message .= "\n\n" . $stuckMessage; 50 $anythingToReport = 1; 51 } 52 53 if ($anythingToReport && $message ne $lastMessage) { 54 55 print "\n\n$message\n\n"; 56 57 sendEmail( 58 "roydhenderson\@gmail.com", 59 "Roboczar warnings", 60 "$message\n"); 61 } 62 63 $lastMessage = $message; 64 65 print "* Going to sleep\n"; 34 66 sleep(1200); 67 } 68 69 70 ########################################################################### 71 # 72 # Checks if anything is stuck 73 # 74 ########################################################################### 75 sub anyStuckStages { 76 my ($interval, $message) = @_; 77 78 my $end = $czarDb->getNowTimestamp(); 79 #$end = '2010-10-24 15'; 80 my $begin = $czarDb->subtractInterval($end, $interval); 81 my $anyStuckStages = 0; 82 83 # exception - we don't care if burntool is stalled before 6:30am 84 my $burntime = strftime('%Y-%m-%d 06:35', localtime); 85 my $worryAboutBurntool = $czarDb->isBefore($burntime, $end); 86 87 ${$message} = "Processing stages:\n"; 88 89 my $stage; 90 foreach $stage (@stages) { 91 my ($started, $finished, $stuck, $processed, $pending, $faults, $totalTime); 92 $czarDb->runAnalysis( 93 "all_stdscience_labels", 94 $stage, 95 $begin, 96 $end, 97 \$started, 98 \$finished, 99 \$stuck, 100 \$processed, 101 \$pending, 102 \$faults, 103 \$totalTime); 104 105 print "* $stage $end $begin:\n"; 106 if (defined $started) {print "* Processing started at $started\n";} 107 if (defined $finished) {print "* Processing finished at $finished and took $totalTime\n";} 108 else {print "* Processing has not finished\n";} 109 if (defined $stuck) {print "* Processing has been stuck since $stuck\n";} 110 print "* $processed exposures have been processed, with $pending pending and $faults faults\n"; 111 112 print "*******************************************************************************\n"; 113 114 if ($stuck && $stage eq "burntool" && !$worryAboutBurntool) {next;} 115 116 if ($stuck) { 117 118 ${$message} = ${$message} ."\n - '$stage' is stuck with $pending pending exposures (and $faults faults)"; 119 $anyStuckStages = 1; 120 } 121 } 122 123 return $anyStuckStages; 35 124 } 36 125 37 126 ########################################################################### 38 127 # 39 # Checks tha the important servers are running128 # Checks that the important servers are running 40 129 # 41 130 ########################################################################### 42 sub checkServers {43 my ($interval ) = @_;131 sub anyStoppedServers { 132 my ($interval, $message) = @_; 44 133 134 my $anythingToReport = 0; 45 135 my $server; 136 my $since; 137 138 ${$message} = "Pantasks servers:\n"; 139 46 140 foreach $server (@serversWeCareAbout) { 47 141 48 if ($czarDb->isServerDown($server, $interval)) { 142 # is server alice? 143 if ($czarDb->isServerDown($server, $interval, \$since)) { 49 144 50 print "$server has been down for the last $interval\n"; 51 sendEmail( 52 "roydhenderson\@gmail.com", 53 "roboczar\@ipp.com", 54 "Roboczar update", 55 "\n\n* '$server' server has been down for the last $interval\n\n"); 145 ${$message} = ${$message} . "\n - '$server' has been DOWN since '$since'"; 146 $anythingToReport = 1; 56 147 } 57 else { 58 #print "$server has been running for some of the last $interval\n"; 148 # is it running? 149 elsif ($czarDb->isServerStopped($server, $interval, \$since)) { 150 151 ${$message} = ${$message} . "\n - '$server' has been stopped since '$since'"; 152 $anythingToReport = 1; 59 153 } 60 154 } 155 156 return $anythingToReport; 61 157 } 62 158 … … 67 163 ########################################################################### 68 164 sub sendEmail { 69 my ($to, $ from, $subject, $message) = @_;165 my ($to, $subject, $message) = @_; 70 166 71 167 my $sendmail = '/usr/lib/sendmail'; 72 168 open(MAIL, "|$sendmail -oi -t"); 73 print MAIL "From: $from\n";169 print MAIL "From: roboczar\@ipp.org\n"; 74 170 print MAIL "To: $to\n"; 75 171 print MAIL "Subject: $subject\n\n";
Note:
See TracChangeset
for help on using the changeset viewer.
