IPP Software Navigation Tools IPP Links Communication Pan-STARRS Links

Ignore:
Timestamp:
Jan 12, 2015, 12:53:36 PM (11 years ago)
Author:
eugene
Message:

merging changes from ipp-pv3-20140717 (via branches/eam_branches/ipp-pv3-20140717-merge)

Location:
trunk/ippScripts/scripts
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/ippScripts/scripts

  • trunk/ippScripts/scripts/sc_transfer_tool.pl

    r37125 r37833  
    1010use Sys::Hostname;
    1111
     12my $scp_cmd = "/usr/projects/cosmo/amd6100/bin/scp";
     13my $ssh_cmd = "/usr/projects/cosmo/amd6100/bin/ssh";
     14
    1215my $remote_root = '/lustre/scratch1/turquoise/watersc1/ps1/';
    13 $remote_root = '/scratch3/watersc1/';
    14 my $local_raw = "${remote_root}/raw/";
     16   $remote_root = '/scratch3/watersc1/';
     17
     18#my $local_raw = "${remote_root}/tmp/";
    1519my $local_tmp = "${remote_root}/tmp/";
    16 my $threads = 10;
    17 my @hosts = ('ippc20.ipp.ifa.hawaii.edu','ippc24.ipp.ifa.hawaii.edu','ippc28.ipp.ifa.hawaii.edu',
    18              'ippc21.ipp.ifa.hawaii.edu','ippc25.ipp.ifa.hawaii.edu','ippc29.ipp.ifa.hawaii.edu',
    19              'ippc22.ipp.ifa.hawaii.edu','ippc26.ipp.ifa.hawaii.edu',
    20              'ippc23.ipp.ifa.hawaii.edu','ippc27.ipp.ifa.hawaii.edu'    );
     20my $threads = 11;
     21my @hosts = (
     22    'ippc20.ipp.ifa.hawaii.edu',
     23    'ippc21.ipp.ifa.hawaii.edu',
     24    'ippc22.ipp.ifa.hawaii.edu',
     25    'ippc23.ipp.ifa.hawaii.edu',
     26    'ippc24.ipp.ifa.hawaii.edu',
     27    'ippc25.ipp.ifa.hawaii.edu',
     28    'ippc26.ipp.ifa.hawaii.edu',
     29    'ippc27.ipp.ifa.hawaii.edu',
     30    'ippc28.ipp.ifa.hawaii.edu',       
     31#'ippc29.ipp.ifa.hawaii.edu', (mark's hi-mem test machine)
     32#'ippc30.ipp.ifa.hawaii.edu', (postage stamp server)   
     33    'ippc31.ipp.ifa.hawaii.edu',       
     34    'ippc32.ipp.ifa.hawaii.edu'   
     35    );
    2136@hosts = (@hosts, @hosts, @hosts, @hosts, @hosts);
    22 my $input_file;
     37my %server_options = (
     38    'ippc20.ipp.ifa.hawaii.edu' => '',
     39    'ippc21.ipp.ifa.hawaii.edu' => '',
     40    'ippc22.ipp.ifa.hawaii.edu' => '',
     41    'ippc23.ipp.ifa.hawaii.edu' => '',
     42    'ippc24.ipp.ifa.hawaii.edu' => '',
     43    'ippc25.ipp.ifa.hawaii.edu' => '',
     44    'ippc26.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes',
     45    'ippc27.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes',
     46    'ippc28.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes',
     47    'ippc29.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes',
     48    'ippc30.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes',
     49    'ippc31.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes',
     50    'ippc32.ipp.ifa.hawaii.edu' => '', # -o NoneSwitch=yes -o NoneEnabled=yes'
     51    );
     52
     53my $input_path;
     54my $local_file;
     55my $remote_file;
     56
    2357my $verbose = 0;
    2458my $fetch = 0;
    2559my $offset = 0;
     60my $retry = 0;
     61my $quickfetch = 0;
    2662
    2763GetOptions(
    2864    'threads=s'   => \$threads,
    29     'input=s'     => \$input_file,
    30     'fetch=s'     => \$fetch,
     65    'input=s'     => \$input_path,
    3166    'offset=s'    => \$offset,
     67    'fetch'       => \$fetch,
    3268    'verbose'     => \$verbose,
     69    'retry'       => \$retry,
     70    'quickfetch'  => \$quickfetch,
    3371    ) or pod2usage( 2 );
    3472pod2usage( -msg => "Unknown option: @ARGV", -exitval => 2) if @ARGV;
    3573pod2usage( -msg => "Required options: --input", -exitval => 3) unless
    36     defined($input_file);
     74    defined($input_path);
    3775
    3876my $hostname = hostname;
     
    4179# split in input file list
    4280unless(-d $local_tmp) { system("mkdir -p $local_tmp"); }
    43 unless(-d $local_raw) { system("mkdir -p $local_raw"); }
    44 
    45 open(I,$input_file) || die "Couldn't find input file specified\n";
     81#unless(-d $local_raw) { system("mkdir -p $local_raw"); }
     82
     83# we have two modes: fetch (-> LANL) and return (-> IPP)
     84
    4685if ($fetch) {
    47     open(F,$fetch) || die "Couldn't find fetch file specified\n";
    48 }
    49 my $input_base = basename($input_file);
    50 my @filehandles;
     86    $local_file = "$input_path.check";
     87    $remote_file = "$input_path.transfer";
     88} else {
     89    $local_file = "$input_path.generate";
     90    $remote_file = "$input_path.return";
     91}
     92my $input_base = basename($local_file);
     93
     94unless ($retry) {
     95  # open the input file list
     96  open (LOCAL,$local_file) || die "Couldn't find input file specified $local_file\n";
     97  open (REMOTE,$remote_file) || die "Couldn't find input file specified $remote_file\n";
     98 
     99  # generate N output files (to be fed to the N rsync / tar threads)
     100  my @filehandles;
     101  my $i;
     102  my $line = 0;
     103  for ($i = 0; $i < $threads; $i++) {
     104      open($filehandles[$i], ">${local_tmp}/${input_base}.${i}");
     105  }
     106 
     107  unless ($fetch) {
     108      my $stat_file = "$input_path.stat";
     109      print "STATFILE: $stat_file\n";
     110      open (STATFILE, ">$stat_file");
     111  }
     112 
     113  $i = 0;
     114  while (my $Lname = <LOCAL>) {
     115      my $Rname = <REMOTE>;
     116 
     117      chomp $Lname;
     118      chomp $Rname;
     119 
     120      $line++;
     121      if ($line < $offset) { next; } # I think this is off-by-one in a safe direction
     122 
     123      if ($line % 250 == 0)  { print STDERR "line $line : $Lname\n"; }
     124 
     125      $i = int(rand($#filehandles + 1));
     126      if ($i >= $threads) {
     127        print STDERR "HUH: impossible file handle?\n";
     128        die;
     129      }
     130 
     131      # if we are fetching, and 'quickfetch' is requested, skip file if it exists
     132      # NOTE: this does not check that the transfer was complete or the file is current
     133 
     134      if ($fetch) {
     135        # We are fetching, and do not already have this file.
     136        if ($quickfetch && (-e $Lname)) { next; }
     137        print { $filehandles[$i] } "$Rname\n";
     138      } else {
     139        # We are pushing, but only send back files we actually have
     140        # we cannot have the remote file unless the local file exists
     141        # (it is a link to the local file and is generated after the local file)
     142        my $haveRemote = -e "$local_tmp/$Rname";
     143        if ($haveRemote) {
     144            print { $filehandles[$i] } "$Rname\n";
     145            print STATFILE "$Rname PASS\n";
     146        } else {
     147            my $haveLocal = -e $Lname;
     148            if ($haveLocal) {
     149                print STATFILE "$Rname PART\n";
     150            } else {
     151                print STATFILE "$Rname FAIL\n";
     152            }
     153        }
     154      }
     155  }
     156  close(LOCAL);
     157  close(REMOTE);
     158  unless ($fetch) { close(STATFILE); }
     159 
     160  for ($i = 0; $i < $threads; $i++) {
     161      close($filehandles[$i]);
     162  }
     163}
     164
    51165my $i;
    52 my $line = 0;
    53 for ($i = 0; $i < $threads; $i++) {
    54     open($filehandles[$i], ">${local_tmp}/${input_base}.${i}");
    55 }
    56 $i = 0;
    57 while(<I>) {
    58     chomp;
    59     $line++;
    60     if ($line < $offset) { next; } # I think this is off-by-one in a safe direction
    61 
    62     $i = int(rand($#filehandles + 1));
    63 
    64     my $fline;
    65     if ($fetch) {
    66         $fline = <F>;
    67     }
    68     if (($fetch)&&(!(-e $_))) {  # We are fetching, and do not already have this file.
    69         print { $filehandles[$i] } $fline;
    70     }
    71     elsif (!($fetch)) { # We are pushing
    72         # The rsync call expects to find files of a given name in the directory specified.
    73 #       print { $filehandles[$i] } "${local_tmp}/$_" . "\n";
    74         print { $filehandles[$i] } "$_" . "\n";
    75     }
    76 }
    77 close(I);
    78 
    79 for ($i = 0; $i < $threads; $i++) {
    80     close($filehandles[$i]);
    81 }
    82 
    83 # fork the tars
     166# fork the rsync
    84167my @pids = ();
    85168for ($i = 0; $i < $threads; $i++) {
     
    89172        my $code = 0;
    90173        if ($fetch) {
    91             $code = fetch_task($host,"${input_base}.${i}", 0);
    92         }
    93         else {
     174            $code = fetch_task($host,"${local_tmp}/${input_base}.${i}", 0);
     175        } else {
    94176            $code = transfer_task($host,"${local_tmp}/${input_base}.${i}", 0);
    95177        }
     178        print STDERR "$host $input_base $i $code\n";
    96179        exit($code);
    97180    }
    98181}
     182
     183my $global_status = 0;
    99184for ($i = 0; $i < $threads; $i++) {
    100185    waitpid($pids[$i],0);
     186    my $this_status = $?;
     187    print "exit status $hosts[$i] : $this_status\n";
     188    if ($this_status) { $global_status = ($this_status >> 8); }
    101189}
    102190
     
    105193}
    106194
     195# my $scp_command = "$scp_cmd $option ${local_tmp}/${input_base}.stat $hosts[0]:/tmp/";
     196# print "$scp_command\n";
     197# system("$scp_command");
     198# XXX check return status
     199
     200print "global status: $global_status\n";
     201exit ($global_status);
    107202
    108203# distribute bundles to nodes
     
    115210    my $error = shift;
    116211
    117     # the transform bit is there because it looks like the ' gets dropped, so the * is interpreted, and why is our tar so out of date?
    118 #    my $command = "tar cf - --ignore-failed-read --dereference --files-from=${transfer_filelist} | /usr/projects/cosmo/amd6100/bin/ssh -o NoneSwitch=yes -o NoneEnabled=yes $destination_host tar xf - -C /data/ --transform '" . 's,^.\*/data/,,' . "' --dereference";
    119     my $command = "rsync -Lpt -e '/usr/projects/cosmo/amd6100/bin/ssh -o NoneSwitch=yes -o NoneEnabled=yes' --files-from=${transfer_filelist} ${local_tmp} ${destination_host}:/";
     212    my $option = $server_options{$destination_host};
     213
     214    my $command = "rsync -L --size-only --omit-dir-times -e '$ssh_cmd ${option}' --files-from=${transfer_filelist} ${local_tmp} ${destination_host}:/";
    120215    print STDERR "$command\n";
    121 
     216   
    122217    my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
    123218        run(command => $command, verbose => $verbose);
     219
     220    my $return_value = 0;
    124221    unless ($success) {
    125         print STDERR "ERROR:  $error_code\n";
    126 #       my $std = join "\n", @{ $stdout_buf };
    127 #       print "STDOUT: $std\n";
    128 #       $std = join "\n", @{ $stderr_buf };
    129 #       print "STDERR: $std\n";
    130 
    131 #       foreach my $line (@{ $stderr_buf }) {  # This is a hack.  A messy ugly hack.
    132 #           if ($line =~ /No such file or directory/) {
    133 #               my $file = (split /\s+/, $line)[1];
    134 #               $file =~ s/:$//;
    135 #               if ($file !~ /nebulous/) { next; }
    136 #               print STDERR "Touching $file so tar can continue.\n";
    137 #               system("touch $file");
    138 #           }
    139 #       }
    140 
    141         warn("Transfer of $transfer_filelist to $destination_host failed with error $error_code.");
    142         $error++;
    143         if ($error < 4) {
    144             $error_code = transfer_task($destination_host,$transfer_filelist,$error);
    145         }
    146         else {
    147             die("Failed too many times $error $destination_host $transfer_filelist");
     222        if ($error_code =~ /value 23/) {
     223            print STDERR "Some files failed to transfer.  Partial run?\n";
     224            $error = 23;
     225            $return_value = 23;
     226        } else {
     227            print STDERR "ERROR:  $error_code\n";
     228            warn("Transfer of $transfer_filelist to $destination_host failed with error $error_code.");
     229            sleep(5);
     230            $error++;
     231            if ($error < 4) {
     232                $error_code = transfer_task($destination_host,$transfer_filelist,$error);
     233            }
     234            else {
     235                die("Failed too many times $error $destination_host $transfer_filelist");
     236            }
     237            $return_value = 1;
    148238        }
    149239    }
    150     return($error_code);
     240    return($return_value);
    151241}
    152242   
     
    156246    my $error = shift;
    157247
    158     system("/usr/projects/cosmo/amd6100/bin/scp -o NoneSwitch=yes -o NoneEnabled=yes ${local_tmp}/${transfer_filelist} ${destination_host}:/tmp/");
    159     my $command = "/usr/projects/cosmo/amd6100/bin/ssh -o NoneSwitch=yes -o NoneEnabled=yes $destination_host tar cf - --ignore-failed-read  --dereference --files-from=/tmp/${transfer_filelist} | tar xf - -C ${local_raw} --skip-old-files --warning=existing-file --dereference ";
     248    my $option = $server_options{$destination_host};
     249
     250    my $command = "rsync --size-only -e '$ssh_cmd ${option}' --files-from=${transfer_filelist} ${destination_host}:/ ${local_tmp}";
    160251    print STDERR "$command\n";
    161252
    162     my ( $success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) =
     253    my $error_code = 0;
     254    my ( $success, $error_msg, $full_buf, $stdout_buf, $stderr_buf ) =
    163255        run(command => $command, verbose => $verbose);
    164256    unless ($success) {
    165         $error_code = (($error_code >> 8) or 4);
    166         warn("Transfer of $transfer_filelist to $destination_host failed with error $error_code.");
    167         $error++;
    168         if ($error < 4) {
    169             $error_code = fetch_task($destination_host,$transfer_filelist,$error);
    170         }
    171         else {
    172             die("Failed too many times $error $destination_host $transfer_filelist");
     257        print "raw error_msg: $error_msg...\n";
     258        ($error_code) = $error_msg =~ m/exited with value (\d+)/;
     259        warn("Transfer of files in $transfer_filelist from $destination_host failed with error ($error_code) $error_msg.");
     260        print "*** stdout: *** \n";
     261        foreach my $line (@$stdout_buf) {
     262            print STDERR "stdout: $line\n";
     263        }
     264        print "*** stderr: *** \n";
     265        foreach my $line (@$stderr_buf) {
     266            print STDERR "stderr: $line\n";
     267        }
     268        if ($error_code == 23) {
     269            print STDERR "Some files failed to transfer\n";
     270        } else {
     271            $error++;
     272            if ($error < 4) {
     273                $error_code = fetch_task($destination_host,$transfer_filelist,$error);
     274            } else {
     275                print STDERR "Failed too many times $error $destination_host $transfer_filelist\n";
     276            }
    173277        }
    174278    }
Note: See TracChangeset for help on using the changeset viewer.