IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Changeset 27103


Ignore:
Timestamp:
Feb 26, 2010, 11:01:34 AM (16 years ago)
Author:
watersc1
Message:

Increase the allowed disk consumption fraction from 0.95 to 0.98.

Rework the error/warning reporting of nebdiskd so that it isn't so
annoying about saying the same things over and over again. This also
disables the "Mailer" log interface.

Location:
trunk/Nebulous-Server
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/Nebulous-Server/bin/nebdiskd

    r26380 r27103  
    6969# stuff to controll failures.
    7070my %host_failure_counts = ();
     71my %host_removed = ();
    7172my $failure_limit = 5;
    7273
     
    9697# start up logging
    9798my $conf = '
    98     log4perl.category.nebdiskd = WARN, Screen, SERVERLOGFILE, Mailer
     99    log4perl.category.nebdiskd = WARN, Screen, SERVERLOGFILE
    99100
    100101    log4perl.appender.Screen        = Log::Log4perl::Appender::Screen
     
    113114    log4perl.filter.MatchWarn.AcceptOnMatch = off
    114115
    115     log4perl.appender.Mailer         = Log::Dispatch::Email::MailSend
    116     log4perl.appender.Mailer.to      = ps-ipp-ops@ifa.hawaii.edu
    117     log4perl.appender.Mailer.subject = nebdiskd alert
    118     log4perl.appender.Mailer.buffered = 0
    119     log4perl.appender.Mailer.Filter= MatchWarn
    120     log4perl.appender.Mailer.layout = Log::Log4perl::Layout::PatternLayout
    121     log4perl.appender.Mailer.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} | %H | %p | %M - %m%n
    122 
    123116    log4perl.appender.Limiter              = Log::Log4perl::Appender::Limit
    124117    log4perl.appender.Limiter.appender     = Mailer
    125118    log4perl.appender.Limiter.block_period = 300
    126119';
     120# I've removed this bit.
     121#     log4perl.appender.Mailer         = Log::Dispatch::Email::MailSend
     122#     log4perl.appender.Mailer.to      = ps-ipp-ops@ifa.hawaii.edu
     123#     log4perl.appender.Mailer.subject = nebdiskd alert
     124#     log4perl.appender.Mailer.buffered = 0
     125#     log4perl.appender.Mailer.Filter= MatchWarn
     126#     log4perl.appender.Mailer.layout = Log::Log4perl::Layout::PatternLayout
     127#     log4perl.appender.Mailer.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} | %H | %p | %M - %m%n
     128
     129
    127130Log::Log4perl::init(\$conf);
    128131my $log = Log::Log4perl::get_logger("nebdiskd");
     
    215218                $tries++;
    216219                unless (is_mountpoint($mountpoint)) {
    217                     $log->warn("$mountpoint is not a valid mountpoint ($tries)");
     220                    unless(defined($host_removed{$mountpoint})) {
     221                        $log->warn("$mountpoint is not a valid mountpoint ($tries $host_failure_counts{$mountpoint})");
     222                    }
    218223                    $valid_mountpoint = 0;
    219224                }
     
    226231                }
    227232                $host_failure_counts{$mountpoint}++;
    228 
    229                 if ($host_failure_counts{$mountpoint} > $failure_limit) {
    230                     $log->warn("Removing $mountpoint from the mountedvol table ($host_failure_counts{$mountpoint} > $failure_limit)");
    231                     $d_query->execute($mountpoint);
     233               
     234                if (!(defined($host_removed{$mountpoint})) || !($host_removed{$mountpoint})) {
     235                    if (($host_failure_counts{$mountpoint} > $failure_limit)) {
     236                        $host_removed{$mountpoint} = 1;
     237                        $log->warn("Removing $mountpoint from the mountedvol table ($host_failure_counts{$mountpoint} > $failure_limit) No further warnings unless state changes.");
     238                        $d_query->execute($mountpoint);
     239                    }
     240                    else {
     241                        $log->warn("Mountpoint $mountpoint has had $host_failure_counts{$mountpoint} failures. Will remove after $failure_limit");
     242                    }
    232243                }
    233                 else {
    234                     $log->warn("Mountpoint $mountpoint has had $host_failure_counts{$mountpoint} failures. Will remove after $failure_limit");
    235                 }
    236                 next;
    237             }
     244                next;
     245               
     246            }
    238247            if ($host_failure_counts{$mountpoint} != 0) {
     248                $host_removed{$mountpoint} = 0;
    239249                $log->warn("Mountpoint $mountpoint failures cleared ($host_failure_counts{$mountpoint})");
    240250                $host_failure_counts{$mountpoint} = 0;
  • trunk/Nebulous-Server/lib/Nebulous/Server.pm

    r26294 r27103  
    4141# This determines how many entries from the list of volumes sorted by free space are randomized.
    4242my $topfew_count = 15;
    43 
     43my $max_used_space = 0.98
    4444# transaction restart/retry regex
    4545my $trans_regex = qr/Deadlock Found|Lock wait timeout exceeded|try restarting transaction|Can't connect to MySQL server/i;
     
    16061606    $log->debug("entered - @_");
    16071607
    1608     my ($key, $vol_name) = validate_pos(@_,
     1608    my ($key, $vol_name, $find_invalid) = validate_pos(@_,
    16091609        {
    16101610            type        => SCALAR,
     
    16241624            optional    => 1,
    16251625        },
     1626        {
     1627            # find_invalid
     1628            type        => SCALAR|UNDEF,
     1629            optional    => 1,
     1630        },
    16261631    );
    16271632
     
    16671672        } else {
    16681673            $query = $db->prepare_cached( $sql->get_object_instances );
     1674            my $rows;
    16691675            # ext_id, available
    1670             my $rows = $query->execute($key->path, 1);
     1676            if (defined($find_invalid)) {
     1677                $rows = $query->execute($key->path, 0);
     1678            }
     1679            else {
     1680                $rows = $query->execute($key->path, 1);
     1681            }
    16711682            unless ($rows > 0) {
    16721683                $query->finish;
     
    21082119            $query = $db->prepare_cached( $sql->get_storage_volume_by_name );
    21092120            # %free, name, avaiable, allocate
    2110             $rows = $query->execute(0.95, $name, 1, 1);
     2121            $rows = $query->execute($max_used_space, $name, 1, 1);
    21112122            # XXX destinguish between non-existant and unavailable
    21122123            unless ($rows > 0) {
     
    21292140            $query = $db->prepare_cached( $sql->get_storage_volume );
    21302141            # %free, avaiable, allocate
    2131             $rows = $query->execute(0.95, 1, 1, $topfew_count);
     2142            $rows = $query->execute($max_used_space, 1, 1, $topfew_count);
    21322143#           $log->warn("Storage_volume: $rows $topfew_count");
    21332144            # there has to be atleast one storage volume
     
    22012212        $query = $db->prepare_cached( $sql->get_replication_volume_for_ext_id );
    22022213        # ext_id, %free, avaiable, allocate
    2203         $rows = $query->execute($key->path, 0.95, 1, 1, $forbidden_cabinet, $topfew_count);
     2214        $rows = $query->execute($key->path, $max_used_space, 1, 1, $forbidden_cabinet, $topfew_count);
    22042215        # XXX destinguish between non-existant and unaviable
    22052216        unless ($rows > 0) {
Note: See TracChangeset for help on using the changeset viewer.