IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Sep 30, 2010, 9:20:25 AM (16 years ago)
Author:
rhenders
Message:

New roboczar tool to inform users when processing goes awry

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/tools/roboczar.pl

    r29119 r29279  
    1414use czartool::Burntool;
    1515
    16 my $period = 60;
    1716my $czarDbName = "czardb"; # TODO variables for other Db stuff, host etc
    1817my $save_temps = 0;
    1918
    2019GetOptions (
    21         "period|p=s" => \$period, # TODO more Db args
    2220        "dbname|d=s" => \$czarDbName,
    2321        );
    2422
    2523my $czarDb = new czartool::CzarDb($czarDbName, "ippdb01", "ipp", "ipp", 0, $save_temps); # TODO last arg here is save_temps, should get as arg
    26 my $gpc1Db = new czartool::Gpc1Db("gpc1", "ippdb01", "ippuser", "ippuser");
    27 my $nebulous = new czartool::Nebulous($czarDb);
    28 my $pantasks = new czartool::Pantasks();
    29 my $plotter = new czartool::Plotter($czarDb, "%Y%m%d-%H%M%S", "png font \"/usr/share/fonts/corefonts/arial.ttf\" 8", "/tmp", $save_temps); # TODO hardcoded font path
    30 my $burntool = new czartool::Burntool();
    3124
    3225$czarDb->setDateFormat("%Y%m%d-%H%i%s");
    3326
    3427my @stages = ("burntool", "chip", "cam", "fake", "warp", "stack", "diff", "magic", "magicDS", "dist");
     28my @serversWeCareAbout = ("stdscience", "distribution", "summitcopy", "registration");
    3529
    3630
    37 timePoll($period);
     31while(1) {
    3832
    39 ###########################################################################
    40 #
    41 # Updates the labels from pantasks for all interested servers
    42 #
    43 ###########################################################################
    44 sub updateLabels {
    45 
    46     print "* Updating labels\n";
    47     my @servers = ("stdscience", "distribution", "publishing", "update");
    48 
    49     my $server = undef;
    50     foreach $server (@servers) {
    51 
    52         my @labels = @{$pantasks->getLabels($server)};
    53         if (@labels) {
    54        
    55             $czarDb->updateCurrentLabels($server, \@labels);
    56         }
    57         else {
    58        
    59              print "WARNING: No labels to update for '$server'\n";
    60         }
    61     }
     33    checkServers("20 MINUTE");
     34    sleep(1200);
    6235}
    6336
    6437###########################################################################
    6538#
    66 # Updates pantasks server status TODO should really get info for all servers at once
     39# Checks tha the important servers are running
    6740#
    6841###########################################################################
    69 sub updateServerStatus {
    70     print "* Checking all pantasks servers\n";
     42sub checkServers {
     43    my ($interval) = @_;
    7144
    72     my $servers = $pantasks->getServerList();
     45    my $server;
     46    foreach $server (@serversWeCareAbout) {
    7347
    74     my $server = undef;
    75     my $alive = undef;
    76     my $running = undef;
    77     foreach $server (@{$servers}) {
     48        if ($czarDb->isServerDown($server, $interval)) {
    7849
    79         $pantasks->getServerStatus($server, \$alive, \$running);
    80         $czarDb->updateServerStatus($server, $alive, $running);
     50            print "$server has been down for the last $interval\n";
     51            sendEmail(
     52                    "roydhenderson\@gmail.com",
     53                    "roboczar\@ipp.com",
     54                    "Roboczar update",
     55                    "\n\n* '$server' server has been down for the last $interval\n\n");
     56        }
     57        else {
     58            #print "$server has been running for some of the last $interval\n";
     59        }
    8160    }
    82 }
     61} 
    8362
    8463###########################################################################
    8564#
    86 # Polls with provided period (seconds)
     65#  Checks the status of a given label
    8766#
    8867###########################################################################
    89 sub timePoll {
    90     my ($period) = @_;
    91 
    92     my $label;
    93     my $new;
    94     my $full;
    95     my $faults;
    96     my $stage;
    97     my $query = undef;
    98     my $str = undef;
    99     my $labels = undef;
    100     my $updateLabels = undef;
    101     my $row = undef;
    102     my $begin = undef;
    103     my $end = undef;
    104     my $priority = undef;
    105     my $newState = undef;
    106     my $nsStatus = undef;
    107 
    108     while (1) {
    109 
    110         # sort out times
    111         $begin =  strftime('%Y-%m-%d 06:35',localtime);
    112         $end = $czarDb->getNowTimestamp();
    113 
    114         if ($czarDb->isBefore($end, $begin)) {
    115 
    116             $begin = $czarDb->subtractInterval($begin, "1 DAY");
    117         }
    118 
    119         # check nightly science status
    120         print "* Checking nightly science status\n";
    121         if (!$pantasks->getNightlyScienceStatus(\$nsStatus)) {$nsStatus = "Unknown";}
    122         $czarDb->updateNightlyScience($nsStatus);
    123 
    124         # check nebulous
    125         print "* Checking Nebulous\n";
    126         $nebulous->updateClusterSpaceInfo();
    127         $plotter->plotDiskUsageHistogram();
    128         updateServerStatus();
    129 
    130         # check labels
    131         updateLabels();
    132 
    133         # servers to check
    134         my @serversToCheck = ("stdscience", "update");
    135 
    136         my $thisServer = undef;
    137         foreach $thisServer (@serversToCheck) {
    138 
    139             if ($thisServer eq "update") {$newState = "update";}
    140             else {$newState = "new";}
    141 
    142             # deal with stdscience labels
    143             if (!$czarDb->getCurrentLabels($thisServer, \$labels)) {next;}
    144             my $size = @{$labels};
    145             if($size > 0) {
    146 
    147                 # get priority
    148                 foreach $row ( @{$labels} ) {
    149                     my ($label) = @{$row};
    150                     $priority = $gpc1Db->getPriority($label);
    151                     $czarDb->setLabelPriority($label, $priority);
    152                 }
    153 
    154                 updateAllStages($thisServer, $newState, $labels, $begin, $end);
    155                 createPlots($thisServer, $labels, $begin, $end);
    156             }
    157             else { print "* WARNING: no $thisServer labels found in Db\n";}
    158         }
    159 
    160         print "--------------------------------------------------------------------------\n";
    161         print "* Going to sleep\n";
    162         sleep($period);
    163         print "* Waking up\n";
    164 
    165         #sendEmail("roydhenderson\@gmail.com", "roboczar\@ipp.com", "Roboczar update", "Some content");
    166     };
    167 }
    168 
    169 ###########################################################################
    170 #
    171 # Loops through labels and creates time series and histogram plots
    172 #
    173 ###########################################################################
    174 sub createPlots {
    175     my ($server, $rows, $begin, $end) = @_;
    176 
    177     my $stage = undef;
    178     my $row = undef;
    179 
    180     print "* Generating plots\n";
    181 
    182     # create plots for each label for each stage
    183     foreach $stage (@stages) {
    184         foreach $row ( @{$rows} ) {
    185             my ($label) = @{$row};
    186 
    187             chomp($label);
    188             $plotter->createLogAndLinearTimeSeries($label,  $stage, $begin, $end);
    189         }
    190     }
    191 
    192     # create plots for each label for all stages
    193     foreach $row ( @{$rows} ) {
    194         my ($label) = @{$row};
    195 
    196         $plotter->createLogAndLinearTimeSeries($label, undef, $begin, $end);
    197         $plotter->createHistogram($label, $begin, $end);
    198 
    199         #routineChecks($label, "1 HOUR");
    200     }
    201     $plotter->createLogAndLinearTimeSeries("all_".$server."_labels", undef, $begin, $end);
    202     $plotter->createHistogram("all_".$server."_labels", $begin, $end);
    203     foreach $stage (@stages) {
    204 
    205         $plotter->createLogAndLinearTimeSeries("all_".$server."_labels",  $stage, $begin, $end); # TODO must be a neater way...
    206     }
    207 }
    208 
    209 ###########################################################################
    210 #
    211 # Loops through some labels and updates processed/pending/faults in the Db
    212 #
    213 ###########################################################################
    214 sub updateAllStages {
    215     my ($labelServer, $newState, $rows, $begin, $end) = @_;
    216 
    217     print "* Updating stage data\n";
    218     my $totalNew = undef;
    219     my $totalFaults = undef;
    220     my $totalFull = undef;
    221     my $stage = undef;
    222     my $reverting = 0;
    223     my $row = undef;
    224     my $new = undef;
    225     my $full = undef;
    226     my $faults = undef;
    227     my $server = undef;
    228     my $state = undef;
    229    
    230     foreach $stage (@stages) {
    231 
    232         $server = $pantasks->getServerForThisStage($stage);
    233         $pantasks->getRevertStatus($stage, \$reverting);
    234         $czarDb->updateRevertStatus($stage, $reverting);
    235 
    236         print "* Checking labels for $stage stage\n";
    237 
    238         $totalNew=$totalFaults=$totalFull=0;
    239         foreach $row ( @{$rows} ) {
    240             my ($label) = @{$row};
    241 
    242             chomp($label);
    243 
    244             if ($stage eq "burntool") {
    245 
    246                 if ($labelServer eq "stdscience") {
    247 
    248                     $burntool->getPendingAndProcessed($label, \$new, \$full);
    249                     $faults = 0;
    250                 }
    251                 else { $new = $full = $faults = 0;}
    252             }
    253             else {
    254 
    255                 $new = $gpc1Db->countExposures($label, $stage, $newState);
    256                 $full = $gpc1Db->countExposures($label, $stage, "full");
    257                 $faults = $gpc1Db->countFaults($label, $stage, $newState);
    258             }
    259             #printf("%s  %s, %s, %d, %d\n", $labelServer, $label, $stage, $new, $faults);
    260             $totalNew += $new;
    261             $totalFull += $full;
    262             $totalFaults += $faults;
    263 
    264             $czarDb->insertNewTimeData($stage, $label, $new, $full, $faults);
    265         }
    266 
    267         $czarDb->insertNewTimeData($stage, "all_".$labelServer."_labels", $totalNew, $totalFull, $totalFaults);
    268     }
    269 }
    270 
    271 ###########################################################################
    272 #
    273 # Performs some routine checks on processing status and sends alerts if it needs to
    274 #
    275 ###########################################################################
    276 sub routineChecks {
     68sub checkLabel {
    27769    my ($label, $interval) = @_;
    27870
     
    30597        }
    30698    }
     99    #sendEmail("roydhenderson\@gmail.com", "roboczar\@ipp.com", "Roboczar update", "Some content");
    307100}
    308101
Note: See TracChangeset for help on using the changeset viewer.