Changeset 39924
- Timestamp:
- Jan 6, 2017, 11:18:12 AM (9 years ago)
- Location:
- branches/czw_branch/20160809
- Files:
-
- 9 edited
-
. (modified) (1 prop)
-
ippScripts/scripts/permcheck.pl (modified) (9 diffs)
-
ippScripts/scripts/rawcheck.pl (modified) (12 diffs)
-
ippTools/share/pztool_pendingimfile.sql (modified) (2 diffs)
-
ippconfig/recipes/nightly_science.config (modified) (2 diffs)
-
ippconfig/recipes/psphot.config (modified) (2 diffs)
-
psLib/share/tai_utc.dat (modified) (1 diff)
-
psLib/share/tai_utc.raw (modified) (1 diff)
-
psphot/src/psphotFitSourcesLinear.c (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
branches/czw_branch/20160809
-
branches/czw_branch/20160809/ippScripts/scripts/permcheck.pl
r37833 r39924 20 20 my $missing_tools = 0; 21 21 #my $regtool = can_run('regtool') or (warn "Can't find regtool" and $missing_tools = 1); 22 my $chiptool = can_run('chiptool') or (warn "Can't find chiptool" and $missing_tools = 1); 22 23 my $camtool = can_run('camtool') or (warn "Can't find camtool" and $missing_tools = 1); 23 24 my $warptool = can_run('warptool') or (warn "Can't find warptool" and $missing_tools = 1); 24 25 my $stacktool = can_run('stacktool') or (warn "Can't find stacktool" and $missing_tools = 1); 25 26 my $staticskytool = can_run('staticskytool') or (warn "Can't find staticskytool" and $missing_tools = 1); 26 27 my ($server,$dbname,$stage,$stage_id); 27 my $fftool = can_run('fftool') or (warn "Can't find fftool" and $missing_tools = 1); 28 29 my ($server,$dbname,$stage,$stage_id,$do_cull,$save_log); 28 30 29 31 $server = $ENV{'NEB_SERVER'} unless $server; … … 34 36 'stage=s' => \$stage, 35 37 'stage_id|x=s' => \$stage_id, 38 'save_log' => \$save_log, 39 'cull' => \$do_cull, 36 40 ) || pod2usage( 2 ); 41 42 unless(defined($do_cull)) { 43 $do_cull = 0; 44 } 37 45 38 46 # Option parsing … … 47 55 unless defined $stage_id; 48 56 57 if ($save_log) { 58 my $time = time(); 59 my $logDest = "neb://any/perm_check/${stage}/${stage}_${stage_id}.${time}"; 60 my $ipprc = PS::IPP::Config->new( "GPC1" ) or die "Could not create config object.\n"; 61 $ipprc->redirect_output($logDest) or die "Could not redirect output to logfile ${logDest}\n"; 62 } 63 64 49 65 # Global options: 50 66 ## Define the configuration. Ideally, this would be retrieved from the nebulous 67 ## database, but there are some issues with that (such as grouping the b nodes 68 ## into a less restrictive "offsite" location). 51 69 my $do_ops = 1; 52 my %backup_hosts = ('ippb00' => 1, 'ippb01' => 1, 53 'ippb02' => 1, 'ippb03' => 1, 54 'ippb04' => 1, 'ippb05' => 1, 55 'ippb06' => 1 56 ); 57 my %backup_destinations = (#'ippb04' => 1, 58 #'ippb05' => 1, 59 'ippb06' => 1 60 ); 61 62 if ($stage eq 'warp') { # These things are destined to have their only copy on the stsciXX nodes. 63 %backup_hosts = ('stsci00' => 1, 'stsci01' => 1, 'stsci02' => 1, 64 #'stsci03' => 1, 65 'stsci04' => 1, 'stsci05' => 1, 'stsci06' => 1, 66 'stsci07' => 1, 'stsci08' => 1, 'stsci09' => 1, 67 'stsci10' => 1, 'stsci11' => 1, 'stsci12' => 1, 68 'stsci13' => 1, 'stsci14' => 1, 'stsci15' => 1, 'stsci16' => 1, 69 'stsci17' => 1, 'stsci18' => 1, 'stsci19' => 1); 70 %backup_destinations = %backup_hosts; 71 } 72 73 my $backup_Nvols = 3; 74 75 my $ipprc; 76 if ($dbname eq 'gpc1') { 77 $ipprc = PS::IPP::Config->new( "GPC1" ); 78 } 79 else { 80 die "Unknown camera to use."; 81 } 70 my $i; 82 71 83 72 # Set up nebulous db interface … … 88 77 unless defined $neb; 89 78 79 ## This new implementation is somewhat messy, but is more general and adaptable. 80 ## First, we set up a requirement mapping, explaining where we want copies, and 81 ## how many copies we want at each site. 82 my %requirement_map = (); 83 $requirement_map{ITC} = 1; 84 $requirement_map{OFFSITE} = 1; 85 $requirement_map{MRTCB} = 0; 86 87 ## Second, construct a list of volumes, mapped to their site location, using the 88 ## same site locations as in the requirement map. 89 my %volume_map = (); 90 for ($i = 4; $i <= 21; $i++) { 91 my $vol = sprintf("ipp%03d.0",$i); 92 $volume_map{$vol} = 'MRTCB'; 93 } 94 for ($i = 23; $i <= 32; $i++) { 95 my $vol = sprintf("ipp%03d.0",$i); 96 $volume_map{$vol} = 'MRTCB'; 97 } 98 for ($i = 54; $i <= 97; $i++) { 99 my $vol = sprintf("ipp%03d.0",$i); 100 $volume_map{$vol} = 'MRTCB'; 101 } 102 for ($i = 100; $i <= 104; $i++) { 103 my $vol = sprintf("ipp%03d.0",$i); 104 $volume_map{$vol} = 'MRTCB'; 105 $vol = sprintf("ipp%03d.1",$i); 106 $volume_map{$vol} = 'MRTCB'; 107 } 108 for ($i = 105; $i <= 117; $i++) { 109 # if ($i == 115) { next; } 110 my $vol = sprintf("ipp%03d.0",$i); 111 $volume_map{$vol} = 'ITC'; 112 $vol = sprintf("ipp%03d.1",$i); 113 $volume_map{$vol} = 'ITC'; 114 } 115 for ($i = 0; $i <= 15; $i++) { 116 if ($i == 6) { next; } # This isn't "offsite", it's with the rest of the maui cluster. 117 if ($i == 9) { next; } # Not online 118 119 my $vol = sprintf("ippb%02d.0",$i); 120 $volume_map{$vol} = 'OFFSITE'; 121 $vol = sprintf("ippb%02d.1",$i); 122 $volume_map{$vol} = 'OFFSITE'; 123 $vol = sprintf("ippb%02d.2",$i); 124 $volume_map{$vol} = 'OFFSITE'; 125 } 126 127 ## Next, get disk space values, and check which hosts are listed as available. 128 my %acceptable_volume = (); 129 my $mounts = $neb->mounts(); 130 foreach my $vol_row (@$mounts) { 131 my ($mount_point, $total, $used, $vol_id, $name, $host, $path, $allocate, $available, $xattr) = @{ $vol_row }; 132 if (($allocate == 1)&&($available == 1)&&( $used / $total < 0.98)) { 133 $acceptable_volume{$name} = 1; 134 } 135 else { 136 print "## $name $allocate $available $used $total\n"; 137 $acceptable_volume{$name} = 0; 138 } 139 if ($name =~ /ippb05/) { 140 $acceptable_volume{$name} = 0; 141 } 142 # if (($name eq 'ipp106.0')||($name eq 'ipp106.1')) { 143 # $acceptable_volume{$name} = 0; 144 # } 145 } 146 147 ## Finally, generate lists containing which volumes are located at which site. 148 ## This allows us to randomly select a volume from the list for the site that 149 ## we plan on replicating to. 150 my %volume_lists = (); 151 foreach my $vol_key (keys %requirement_map) { 152 @{ $volume_lists{$vol_key} } = grep { $acceptable_volume{$_} == 1 } ( 153 grep { $volume_map{$_} eq $vol_key } (keys %volume_map) 154 ); 155 print "$vol_key " . join(' ', @{ $volume_lists{$vol_key} }) . "\n"; 156 157 if ($#{ $volume_lists{$vol_key} } == -1) { 158 die "No acceptable volume found for site $vol_key!\n"; 159 } 160 } 161 90 162 # Pull data from the gpc1 database 91 163 my $verbose = 0; 92 164 my $mdcParser = PS::IPP::Metadata::Config->new; 93 165 94 my $files; 166 # Not technically imfiles, but the nebulous check block already uses files as a variable. 167 my $imfiles; 95 168 if ($stage eq 'camera') { 96 169 my $cmd = "$camtool -processedexp -cam_id $stage_id -dbname $dbname"; … … 101 174 &my_die("Unable to perform stagetool: $error_code", $stage_id); 102 175 } 103 $ files = $mdcParser->parse_list(join "", @$stdout_buf) or176 $imfiles = $mdcParser->parse_list(join "", @$stdout_buf) or 104 177 &my_die("Unable to parse metadata from stagetool", $stage_id); 105 178 } … … 112 185 &my_die("Unable to perform stagetool: $error_code", $stage_id); 113 186 } 114 $ files = $mdcParser->parse_list(join "", @$stdout_buf) or187 $imfiles = $mdcParser->parse_list(join "", @$stdout_buf) or 115 188 &my_die("Unable to parse metadata from stagetool", $stage_id); 116 189 } … … 123 196 &my_die("Unable to perform stagetool: $error_code", $stage_id); 124 197 } 125 $ files = $mdcParser->parse_list(join "", @$stdout_buf) or198 $imfiles = $mdcParser->parse_list(join "", @$stdout_buf) or 126 199 &my_die("Unable to parse metadata from stagetool", $stage_id); 127 200 } … … 134 207 &my_die("Unable to perform stagetool: $error_code", $stage_id); 135 208 } 136 $ files = $mdcParser->parse_list(join "", @$stdout_buf) or209 $imfiles = $mdcParser->parse_list(join "", @$stdout_buf) or 137 210 &my_die("Unable to parse metadata from stagetool", $stage_id); 138 211 } 139 140 my %components = ('camera' => ['PSASTRO.OUTPUT','PSASTRO.OUTPUT.MASK'], 212 elsif ($stage eq 'ff') { 213 my $cmd = "$fftool -result --ff_id $stage_id -dbname $dbname"; 214 my ($success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 215 run(command => $cmd, verbose => 0); 216 unless ($success) { 217 $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR); 218 &my_die("Unable to perform stagetool: $error_code", $stage_id); 219 } 220 $imfiles = $mdcParser->parse_list(join "", @$stdout_buf) or 221 &my_die("Unable to parse metadata from stagetool", $stage_id); 222 } 223 224 225 my $timer_start = time(); 226 my $timer = time(); 227 my %components = ('camera' => ['PSASTRO.OUTPUT','PSASTRO.OUTPUT.MASK','PSPHOT.BACKMDL','PSPHOT.PSF.RAW.SAVE'], 141 228 'warp' => ['PSWARP.OUTPUT','PSWARP.OUTPUT.MASK','PSWARP.OUTPUT.VARIANCE'], 142 229 'stack' => ['PPSTACK.UNCONV.COMP','PPSTACK.UNCONV.MASK.COMP','PPSTACK.UNCONV.VARIANCE.COMP', 143 230 'PPSTACK.UNCONV.EXP','PPSTACK.UNCONV.EXPNUM','PPSTACK.UNCONV.EXPWT.COMP'], 144 'skycal' => ['PSASTRO.OUTPUT.CMF']); 145 146 147 foreach my $entry (@$files) { 231 'skycal' => ['PSASTRO.OUTPUT.CMF','PSPHOT.OUTPUT.CFF'], 232 'ff' => ['PSPHOT.OUT.CMF.MEF','PSPHOT.OUTPUT.CFF','PSPHOT.FULLFORCE.OUTPUT']); 233 234 my $timer_start = time(); 235 my $timer = time(); 236 237 print "## permcheck.pl: $stage $stage_id $dbname $do_cull $do_ops $timer_start\n"; 238 foreach my $entry (@$imfiles) { 148 239 my $path_base = $entry->{path_base}; 149 240 my $data_state = $entry->{state}; 150 241 my $hostname = $entry->{hostname}; 151 242 my $quality = $entry->{quality}; 243 244 $timer = $time() - $timer_start; 245 print "# $path_base $data_state $hostname $quality T: $timer\n"; 152 246 if ($quality != 0) { next; } 153 print "# $path_base $data_state $hostname $quality\n"; 247 248 my @keys = (); 154 249 foreach my $product (@{ $components{$stage} }) { 155 my @keys = (); 156 if (($stage eq 'camera')&&($product eq 'PSASTRO.OUTPUT.MASK')) { 157 my @otas = ('XY01','XY02','XY03','XY04','XY05','XY06', 158 'XY10','XY11','XY12','XY13','XY14','XY15','XY16','XY17', 159 'XY20','XY21','XY22','XY23','XY24','XY25','XY26','XY27', 160 'XY30','XY31','XY32','XY33','XY34','XY35','XY36','XY37', 161 'XY40','XY41','XY42','XY43','XY44','XY45','XY46','XY47', 162 'XY50','XY51','XY52','XY53','XY54','XY55','XY56','XY57', 163 'XY60','XY61','XY62','XY63','XY64','XY65','XY66','XY67', 164 'XY71','XY72','XY73','XY74','XY75','XY76'); 165 foreach my $ota (@otas) { 166 push @keys, $ipprc->filename($product,$path_base,$ota); 167 } 168 } 250 if ($stage eq 'camera') { 251 if ($product eq 'PSASTRO.OUTPUT.MASK') { 252 my @otas = ('XY01','XY02','XY03','XY04','XY05','XY06', 253 'XY10','XY11','XY12','XY13','XY14','XY15','XY16','XY17', 254 'XY20','XY21','XY22','XY23','XY24','XY25','XY26','XY27', 255 'XY30','XY31','XY32','XY33','XY34','XY35','XY36','XY37', 256 'XY40','XY41','XY42','XY43','XY44','XY45','XY46','XY47', 257 'XY50','XY51','XY52','XY53','XY54','XY55','XY56','XY57', 258 'XY60','XY61','XY62','XY63','XY64','XY65','XY66','XY67', 259 'XY71','XY72','XY73','XY74','XY75','XY76'); 260 foreach my $ota (@otas) { 261 push @keys, $ipprc->filename($product,$path_base,$ota); 262 } 263 } 264 elsif (($product eq 'PSPHOT.BACKMDL')||($product eq 'PSPHOT.PSF.RAW.SAVE')) { 265 my $chip_id = $entry->{chip_id}; 266 my $chip_cmd = "$chiptool -processedimfile -chip_id $chip_id -dbname $dbname"; 267 my ($success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 268 run(command => $chip_cmd, verbose => 0); 269 unless ($success) { 270 $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR); 271 &my_die("Unable to perform stagetool: $error_code", $stage_id); 272 } 273 my $chip_$files = $mdcParser->parse_list(join "", @$stdout_buf) or 274 &my_die("Unable to parse metadata from stagetool", $stage_id); 275 276 foreach my $chip_entry (@$chip_files) { 277 my $chip_path_base = $chip_entry->{path_base}; 278 my $class_id = $chip_entry->{class_id}; 279 push @keys, $ipprc->filename($product,$chip_path_base,$class_id); 280 } 281 } 282 } 283 elsif (($stage eq 'ff')&&($product eq 'PSPHOT.FULLFORCE.OUTPUT')) { 284 my $ffsum_cmd = "$fftool -summary -ff_id $stage_id -dbname $dbname"; 285 my ($success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = 286 run(command => $ffsum_cmd, verbose => 0); 287 unless ($success) { 288 $error_code = (($error_code >> 8) or $PS_EXIT_PROG_ERROR); 289 &my_die("Unable to perform stagetool: $error_code", $stage_id); 290 } 291 my $chip_$files = $mdcParser->parse_list(join "", @$stdout_buf) or 292 &my_die("Unable to parse metadata from stagetool", $stage_id); 293 294 foreach my $chip_entry (@$chip_files) { 295 my $chip_path_base = $chip_entry->{path_base}; 296 my $class_id = $chip_entry->{class_id}; 297 push @keys, $ipprc->filename($product,$chip_path_base,); 298 } 299 } 169 300 else { 170 301 push @keys, $ipprc->filename($product,$path_base); 171 302 } 172 303 # Do validation 304 } 305 306 $timer = time() - $timer_start; 307 308 printf("# Identified %d : %s\n",$#keys + 1, $timer); 309 foreach my $key (@keys) { 310 # neb-stat level handling 311 my $stat = $neb->stat($key); 312 die "nebulous key: $key not found" unless $stat; 313 my $instances; 314 my $md5sum = ''; 315 316 # This needs to be in an eval, because although we expect things to exist, they 317 # may not. This is a fatal error in rawcheck, but need not be here. 318 eval { 319 $instances = $neb->find_instances($key, 'any'); 320 }; 321 unless (defined($instances)) { print "## skipping due to zero instances\n"; next; } 322 die "no instances found" unless $instances; 173 323 174 foreach my $key (@keys) { 175 # neb-stat level handling 176 my $stat = $neb->stat($key); 177 die "nebulous key: $key not found" unless $stat; 178 my $instances; 179 eval { 180 $instances = $neb->find_instances($key, 'any'); 181 }; 182 unless (defined($instances)) { print "## skipping due to zero instances\n"; next; } 183 die "no instances found" unless $instances; 324 my $Ngood; 325 my %good_instances = (); 326 my %bad_instances = (); 327 328 my @files = map {URI->new($_)->file if $_} @$instances; 329 my @validation = (); 330 331 for (my $i = 0; $i <= $#files; $i++) { 332 my ($instance_exists,$instance_md5sum,$instance_host,$instance_volume,$instance_site, $is_good); 333 ($instance_host,$instance_volume) = parse_volume($files[$i]); 334 $instance_site = $volume_map{$instance_volume}; 335 $is_good = 0; 336 337 if (-e $files[$i]) { 338 $instance_exists = 1; 339 $existing_copies++; 340 $instance_md5sum = local_md5sum($files[$i]); 341 342 # This is bad, but I don't know what the right solution is. We don't have 343 # the md5sum a priori. I think this is also the only major change needed 344 # from rawcheck.pl 345 if (($md5sum eq '')&&($instance_md5sum ne 'd41d8cd98f00b204e9800998ecf8427e')) { 346 $md5sum = $instance_md5sum; 347 } 348 349 if ($instance_md5sum eq $md5sum) { 350 push @{ $good_instance{$instance_site} }, $i; 351 $is_good = 1; 352 $Ngood++; 353 } 354 else { 355 push @{ $bad_instances{$instance_site} }, $i; 356 } 357 } 358 else { 359 $instance_exists = 0; 360 $instance_md5sum = 'NON-EXISTANT'; 361 push @{ $bad_instances{$instance_site} }, $i; 362 } 184 363 185 my $user_copies; 186 eval { 187 $user_copies = $neb->getxattr($key, "user.copies"); 188 }; 189 unless(defined($user_copies)) { 190 $user_copies = 1; 191 } 192 193 my $md5sum; 194 my @validation; 195 my %md5sum_uniq; 196 my $existing_copies = 0; 197 198 my @existance; 199 my @md5sums; 200 my @diskfiles = map {URI->new($_)->file if $_} @$instances; 201 my @diskvols; 202 my @diskhosts; 203 my @quality; 204 my $Ngood = 0; 205 my $quality_mask = 0; 206 207 for (my $i = 0; $i <= $#diskfiles; $i++) { 208 if (-e $diskfiles[$i]) { 209 $existance[$i] = 1; 210 $existing_copies++; 211 $md5sums[$i] = local_md5sum($diskfiles[$i]); 212 $md5sum_uniq{$md5sums[$i]} = 1; 213 } 214 else { 215 $existance[$i] = 0; 216 $md5sums[$i] = 'NON-EXISTANT'; 217 $md5sum_uniq{$md5sums[$i]} = 1; 218 } 219 ($diskhosts[$i],$diskvols[$i]) = parse_volume($diskfiles[$i]); 220 $validation[$i] = sprintf("% 3d %32s %s %s %d", 221 $existance[$i], 222 $md5sums[$i], 223 $diskfiles[$i], 224 $diskhosts[$i],$diskvols[$i] 225 ); 226 227 # Pre-parse decisions 228 if ($existance[$i] == 0) { 229 $quality[$i] = 0; 230 } 231 elsif (is_backup_volume($diskhosts[$i])) { 232 $quality[$i] = 1; 233 $quality_mask = $quality_mask | 1; 234 $Ngood++; 235 } 236 else { 237 $quality[$i] = 2; 238 $Ngood++; 239 } 240 } 241 if (scalar(keys(%md5sum_uniq)) != 1) { #unlike the raw data, we don't know the truth. 242 die "There are multiple md5sum values for $key"; 243 } 244 $md5sum = (keys(%md5sum_uniq))[0]; 245 246 print "\n$key $data_state $md5sum $hostname\n"; 247 if (1) { 248 249 print 250 "object id: ", @$stat[0], "\n", 251 "key: ", @$stat[1], "\n"; 252 print 253 "epoch: ", @$stat[4], "\n"; 254 print 255 "md5sum count: ", scalar(keys %md5sum_uniq), "\n"; 256 print 257 "requested instances: ", $user_copies, "\n", 258 "available instances: ", @$stat[6], "\n", 259 "existing instances: ", $existing_copies, "\n", 260 "total instances: ", @$stat[7], "\n", 261 "instance location:\n", " " x 4; 262 print 263 join("\n" . " " x 4, @validation), "\n"; 264 } 265 364 $validation[$i] = sprintf(" % 3d %d %32s %s %s %s\n", 365 $instance_exists,$is_good,$instance_md5sum, 366 $files[$i],$instance_host,$instance_volume); 367 } 368 369 $time = time() - $timer_start; 370 # object_id ext_id epoch available existing total timer 371 printf("%s %s %s %d %d %d %d\n",@$stat[0],@$stat[1],@$stat[4],@$stat[6],$existing_copies,@$stat[7],$timer); 372 # instance_exists is_good instance_md5sum file instance_host instance_volume 373 my $val_string = join('',@validation); 374 print "$val_string"; 375 266 376 # Decide what to do 267 if ($Ngood == 0) { 268 # DO something to attempt to fix this. 269 my $deneb_key = $key; $deneb_key =~ s/.*?gpc/gpc/; 270 271 open(DD,"/home/panstarrs/ipp/local/bin/deneb-locate.py $deneb_key 2> /dev/null |"); 272 my $good_file = ''; 273 while (<DD>) { 274 $_ =~ s/^\s+//; 275 my ($z,undef,$ff) = split /\s+/; 276 if (($ff)&&(-e $ff)) { 277 my $md_response = `md5sum $ff`; 278 if ($md_response =~ /$md5sum/) { 279 $good_file = (split /\s+/,$md_response)[1]; 280 } 377 378 ## This block attempts to find an out-of-nebulous instance with a good md5sum, 379 ## and substitutes it for the 0-th listed instance. This gives us one good 380 ## copy to work with. 381 if ($Ngood == 0) { 382 # DO something to attempt to fix this. 383 my $deneb_key = $key; $deneb_key =~ s/.*?gpc/gpc/; 384 385 open(DD,"/home/panstarrs/ipp/local/bin/deneb-locate.py $deneb_key 2> /dev/null |"); 386 my $good_file = ''; 387 while (<DD>) { 388 $_ =~ s/^\s+//; 389 my ($z,undef,$ff) = split /\s+/; 390 if (($ff)&&(-e $ff)) { 391 my $md_response = `md5sum $ff`; 392 if ($md_response =~ /$md5sum/) { 393 $good_file = (split /\s+/,$md_response)[1]; 281 394 } 282 395 } 283 close(DD); 284 if ($good_file eq '') { 285 die "No valid instance of key: $key"; 286 } 287 else { 288 $quality[0] = 1; 289 print "cp $good_file $diskfiles[0]\n"; 290 vsystem("cp $good_file $diskfiles[0]"); 291 } 292 # Begin my best validation thought 293 { 294 my $tmpmd5 = local_md5sum($diskfiles[0]); 396 } 397 close(DD); 398 if ($good_file eq '') { 399 die "No valid instance of key: $key"; 400 } 401 else { 402 print "cp $good_file $files[0]\n"; 403 if ($do_ops) { 404 system("cp $good_file $files[0]"); 405 } 406 } 407 # Begin my best validation thought 408 { 409 my $tmpmd5 = local_md5sum($files[0]); 410 if ($tmpmd5 ne $md5sum) { 411 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 412 } 413 } 414 # End my best validation thought. 415 416 $Ngood = 1; # We now hand off this single valid instance object to be handled by the Ngood=1 case. 417 ## We've done work here, so we can't do a cull this iteration. 418 if ($do_cull == 1) { $do_cull = -1; } 419 } 420 ## We have more than zero bad copies. We may cull some of these in the future, but we should try to 421 ## leave everything in the best state possible. 422 printf(">> %d %d\n",$Ngood, $#files + 1); 423 if ($Ngood != $#files + 1) { 424 my $good_copy; 425 my $good_copy_index; 426 foreach my $site_key (keys %good_instances) { 427 if ($#{ $good_instances{$site_key} } != -1) { 428 $good_copy_index = $good_instances{$site_key}[0]; 429 $good_copy = $files[$good_copy_index]; 430 last; 431 } 432 } 433 printf(">> GOOD: $good_copy\n"); 434 foreach my $site_key (keys %bad_instances) { 435 foreach my $bad_copy_index (@{ $bad_instances{$site_key} }) { 436 print "cp $good_copy $files[$bad_copy_index]\n"; 437 if ($do_ops) { 438 system("cp $good_copy $files[$bad_copy_index]"); 439 } 440 my $tmpmd5 = local_md5sum($files[$bad_copy_index]); 441 if ($tmpmd5 ne $md5sum) { 442 ## This isn't super critical, so we don't need to die here. 443 warn "Post-repair md5sum does not match! $tmpmd5 != $md5sum: $files[$bad_copy_index]"; 444 } 445 else { 446 ## success 447 push @{ $good_instances{$site_key} }, $bad_copy_index; 448 } 449 } 450 } 451 ## We've done work here, so we can't do a cull this iteration. 452 if ($do_cull == 1) { $do_cull = -1; } 453 } 454 455 ## We can now attempt to make replicated copies to the sites that require additional copies. 456 foreach my $site_key (keys %requirement_map) { 457 my $have_instances = $#{ $good_instances{$site_key} } + 1; 458 print "## $site_key $have_instances $requirement_map{$site_key}\n"; 459 if ($#{ $good_instances{$site_key} } + 1 < $requirement_map{$site_key}) { 460 my $rep_vol = get_random_site_volume($site_key); 461 print "neb-replicate --volume $rep_vol $key\n"; 462 if ($do_ops) { 463 $neb->replicate($key,$rep_vol) or die "failed to replicate the single valid copy to the backup node"; 464 if ($@) { die $@; } 465 466 # Begin my best validation thought 467 system("sync") == 0 or die "Couldn't sync?"; 468 my $uris = $neb->find_instances($key,$rep_vol); 469 @$uris = map {URI->new($_)->file if $_} @$uris; 470 my $tmpmd5 = local_md5sum(${ $uris }[0]); 471 472 my $validation_str = sprintf("% 3d %d %32s %s %s %s", 473 -1,-1,$tmpmd5, 474 ${ $uris }[0],"repl",$rep_vol); 475 print 476 join("\n" . " " x 4, $validation_str), "\n"; 477 295 478 if ($tmpmd5 ne $md5sum) { 296 479 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 297 480 } 298 } 299 # End my best validation thought. 300 301 $Ngood = 1; # We now hand off this single valid instance object to be handled by the Ngood=1 case. 302 } 303 304 if ($quality[0] == 0) { # The first instance is bad. 305 # But since we're here, and not up there, there must be at least one good copy. Find it. 306 for (my $i = 0; $i <= $#md5sums; $i++) { 307 if ($md5sums[$i] eq $md5sum) { # Found it. 308 print "cp $diskfiles[$i] $diskfiles[0]\n"; 309 vsystem("cp $diskfiles[$i] $diskfiles[0]"); 310 # Begin my best validation thought 311 { 312 my $tmpmd5 = local_md5sum($diskfiles[0]); 313 if ($tmpmd5 ne $md5sum) { 314 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 315 } 316 } 317 # End my best validation thought. 318 last; # We're done here now. 319 } 320 } 321 } 322 323 if ($Ngood == 1) { # We have only one version 324 if ($quality_mask & 1) { # ANd it's on a backup volume 325 if ($user_copies > 1) { # And we want more than one copy. 326 print "neb-replicate $key\n"; 327 if ($do_ops) { 328 $neb->replicate($key) or die "failed to replicate the single valid copy"; 329 if ($@) { die $@; } 330 } 331 } 332 } 333 else { # And it's not, so put one there 334 my $rep_vol = get_random_backup_volume(); 335 print "neb-replicate --volume $rep_vol $key\n"; 336 if ($do_ops) { 337 $neb->replicate($key,$rep_vol) or die "failed to replicate the single valid copy to the backup node"; 338 if ($@) { die $@; } 339 340 # Begin my best validation thought 341 vsystem("sync") == 0 or die "Couldn't sync?"; 342 my $uris = $neb->find_instances($key,$rep_vol); 343 @$uris = map {URI->new($_)->file if $_} @$uris; 344 my $tmpmd5 = local_md5sum(${ $uris }[0]); 345 if ($tmpmd5 ne $md5sum) { 346 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 347 } 348 # End my best validation thought. 349 } 350 } 351 } 352 else { # N >= 2 353 if (!($quality_mask & 1)) { # no backup copy 354 my $rep_vol = get_random_backup_volume(); 355 print "neb-replicate --volume $rep_vol $key\n"; 356 if ($do_ops) { 357 $neb->replicate($key,$rep_vol) or die "failed to replicate a copy to the backup node"; 358 if ($@) { die $@; } 359 360 # Begin my best validation thought 361 vsystem("sync") == 0 or die "Couldn't sync?"; 362 my $uris = $neb->find_instances($key,$rep_vol); 363 @$uris = map {URI->new($_)->file if $_} @$uris; 364 my $tmpmd5 = local_md5sum(${ $uris }[0]); 365 if ($tmpmd5 ne $md5sum) { 366 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 367 } 368 # End my best validation thought. 369 } 370 } 371 if (!($quality_mask & 2)) { # no copy on the requested host, so select the first as valid. 372 for (my $i = 0; $i <= $#diskfiles; $i++) { 373 if ($quality[$i] == 2) { 374 $quality[$i] = 1; 375 last; 376 } 377 } 378 } 379 } 380 #XXX NEW DEBUG THIS: This should iterate over diskfiles that are not marked with a quality = 2, and cull them. 381 # For the N>=2, we set that for the primary, and 382 # for both cases, we do not have a diskfile entry for the newly replicated copy. 383 # Therefore, this should cull down to the correct number. 384 for (my $i = 0; $i <= $#diskfiles; $i++) { 385 # print "$existance[$i] $quality[$i] $md5sums[$i] $md5sum $diskhosts[$i] $diskvols[$i]\n"; 386 if ($quality[$i] != 1) { 387 if ($existance[$i] == 0) { # This disk file doesn't exist. 388 vsystem("touch $diskfiles[$i]"); 389 } 390 my $cull_vol = $diskhosts[$i] . "." . $diskvols[$i]; 391 print "neb-cull --volume $cull_vol $key\n"; 392 if ($do_ops) { 393 # The tilde here is to force hard volumes. Don't touch it. 394 $neb->cull($key,"~${cull_vol}",2) or die "failed to cull a superfluous instance"; 395 if ($@) { die "$@"; } 396 } 397 } 398 } 399 # } 400 } # end keys for this product 401 } # end product for this entry 402 } # end entry for this id 403 404 sub vsystem { 405 my $cmd = shift; 406 print "$cmd\n"; 407 if ($do_ops) { 408 system($cmd); 409 } 410 } 481 # End my best validation thought. 482 } 483 ## We've done work here, so we can't do a cull this iteration. 484 if ($do_cull == 1) { $do_cull = -1; } 485 } 486 } 487 488 ## Do culls if that's what we were going to do. 489 if ($do_cull == -1) { 490 die "Cull option passed, but files were modified in the scan/repair/replicate phase. Not running cull!\n"; 491 } 492 elsif ($do_cull == 1) { 493 ## At this point, we should have no files in the bad_instances lists, because we've repaired them. 494 foreach my $site_key (keys %good_instances) { 495 if ($#{ $good_instances{$site_key} } + 1 > $requirement_map{$site_key}) { 496 for ($i = $requirement_map{$site_key}; $i <= $#{ $good_instances{$site_key} }; $i++) { 497 my $cull_index = ${ $good_instances{$site_key} }[$i]; 498 my ($instance_host,$instance_volume) = parse_volume($files[$cull_index]); 499 print "neb-cull --volume $instance_volume $key\n"; 500 if ($do_ops) { 501 # The tilde here is to force hard volumes. Don't touch it. 502 # Also: the 2 is a "minimum number of copies" restriction. Let's not be crazy here. 503 $neb->cull($key,"~${instance_volume}",2) or die "failed to cull a superfluous instance"; 504 if ($@) { die "$@"; } 505 } 506 } # End loop over extra instances 507 } # End check for sites with extra instances 508 } # End loop over sites. 509 } # End cull 510 511 } ## End loop over imfiles. 512 411 513 412 514 sub local_md5sum { … … 425 527 } 426 528 427 428 429 529 sub parse_volume { 430 530 my $filename = shift(@_); 431 531 my $full_volume = (split /\//, $filename)[2]; 432 my ($hostname,$vol_index) = split /\./, $full_volume; # /; 433 return($hostname,$vol_index); 434 } 435 436 sub is_backup_volume { 437 my $hostname = shift(@_); 438 if (exists($backup_hosts{$hostname})) { 439 return(1); 440 } 441 return(0); 442 } 443 444 sub get_random_backup_volume { 445 my $NN = scalar keys %backup_destinations; 446 my $backup_host = (keys %backup_destinations)[int(rand($NN))]; 447 my $backup_vol = int(rand($backup_Nvols)); 448 449 return("${backup_host}.${backup_vol}"); 532 my ($hostname,undef) = split /\./, $full_volume; # /; 533 return($hostname,$full_volume); 534 } 535 536 sub get_random_site_volume { 537 my $site_key = shift(@_); 538 my $NN = scalar @{ $volume_lists{$site_key} }; 539 my $backup_volume = ${ $volume_lists{$site_key} }[int(rand($NN))]; 540 return($backup_volume); 450 541 } 451 542 -
branches/czw_branch/20160809/ippScripts/scripts/rawcheck.pl
r37423 r39924 22 22 23 23 24 my ($server,$dbname,$exp_id );24 my ($server,$dbname,$exp_id,$do_cull,$save_log); 25 25 26 26 $server = $ENV{'NEB_SERVER'} unless $server; … … 30 30 'dbname=s' => \$dbname, 31 31 'exp_id|x=s' => \$exp_id, 32 'save_log' => \$save_log, 33 'cull' => \$do_cull, 32 34 ) || pod2usage( 2 ); 35 36 unless(defined($do_cull)) { 37 $do_cull = 0; 38 } 33 39 34 40 # Option parsing … … 41 47 unless defined $exp_id; 42 48 49 if ($save_log) { 50 my $time = time(); 51 my $logDest = "neb://any/raw_check/exp_${exp_id}.${time}"; 52 my $ipprc = PS::IPP::Config->new( "GPC1" ) or die "Could not create config object.\n"; 53 $ipprc->redirect_output($logDest) or die "Could not redirect output to logfile ${logDest}\n"; 54 } 55 43 56 # Global options: 57 ## Define the configuration. Ideally, this would be retrieved from the nebulous 58 ## database, but there are some issues with that (such as grouping the b nodes 59 ## into a less restrictive "offsite" location). 44 60 45 61 my $do_ops = 1; 46 my %host_mapping = ( 47 'XY01' => 'ipp010','XY02' => 'ipp011','XY03' => 'ipp006','XY04' => 'ipp054', 48 'XY05' => 'ipp007','XY06' => 'ipp055','XY10' => 'ipp008','XY11' => 'ipp056', 49 'XY12' => 'ipp009','XY13' => 'ipp057','XY14' => 'ipp058','XY15' => 'ipp059', 50 'XY16' => 'ipp012','XY17' => 'ipp060','XY20' => 'ipp013','XY21' => 'ipp014', 51 'XY22' => 'ipp061','XY23' => 'ipp015','XY24' => 'ipp016','XY25' => 'ipp062', 52 'XY26' => 'ipp064','XY27' => 'ipp065','XY30' => 'ipp066','XY31' => 'ipp020', 53 'XY32' => 'ipp063','XY33' => 'ipp021','XY34' => 'ipp023','XY35' => 'ipp013', 54 'XY36' => 'ipp024','XY37' => 'ipp019','XY40' => 'ipp025','XY41' => 'ipp018', 55 'XY42' => 'ipp017','XY43' => 'ipp015','XY44' => 'ipp027','XY45' => 'ipp028', 56 'XY46' => 'ipp029','XY47' => 'ipp030','XY50' => 'ipp031','XY51' => 'ipp032', 57 'XY52' => 'ipp033','XY53' => 'ipp034','XY54' => 'ipp035','XY55' => 'ipp036', 58 'XY56' => 'ipp037','XY57' => 'ipp038','XY60' => 'ipp039','XY61' => 'ipp040', 59 'XY62' => 'ipp041','XY63' => 'ipp042','XY64' => 'ipp043','XY65' => 'ipp044', 60 'XY66' => 'ipp046','XY67' => 'ipp047','XY71' => 'ipp048','XY72' => 'ipp049', 61 'XY73' => 'ipp050','XY74' => 'ipp051','XY75' => 'ipp052','XY76' => 'ipp053'); 62 my %backup_hosts = ('ippb00' => 1, 'ippb01' => 1, 63 'ippb02' => 1, 'ippb03' => 1, 64 'ippb04' => 1, 'ippb05' => 1, 65 'ippb06' => 1 66 ); 67 my %backup_destinations = (# 'ippb04' => 1, # full 68 # 'ippb05' => 1, # full 69 'ippb06' => 1 70 ); 71 my $backup_Nvols = 3; 72 73 # Set up nebulous db interface 62 my $i; 63 64 ## Set up nebulous db interface, and pull processing information to consider. 74 65 my $neb = Nebulous::Client->new( 75 66 proxy => "$server", … … 78 69 unless defined $neb; 79 70 80 # Pull data from the gpc1 database 71 ## This new implementation is somewhat messy, but is more general and adaptable. 72 ## First, we set up a requirement mapping, explaining where we want copies, and 73 ## how many copies we want at each site. 74 my %requirement_map = (); 75 $requirement_map{ITC} = 1; 76 $requirement_map{OFFSITE} = 1; 77 $requirement_map{MRTCB} = 0; 78 79 ## Second, construct a list of volumes, mapped to their site location, using the 80 ## same site locations as in the requirement map. 81 my %volume_map = (); 82 for ($i = 4; $i <= 21; $i++) { 83 my $vol = sprintf("ipp%03d.0",$i); 84 $volume_map{$vol} = 'MRTCB'; 85 } 86 for ($i = 23; $i <= 32; $i++) { 87 my $vol = sprintf("ipp%03d.0",$i); 88 $volume_map{$vol} = 'MRTCB'; 89 } 90 for ($i = 54; $i <= 97; $i++) { 91 my $vol = sprintf("ipp%03d.0",$i); 92 $volume_map{$vol} = 'MRTCB'; 93 } 94 for ($i = 100; $i <= 104; $i++) { 95 my $vol = sprintf("ipp%03d.0",$i); 96 $volume_map{$vol} = 'MRTCB'; 97 $vol = sprintf("ipp%03d.1",$i); 98 $volume_map{$vol} = 'MRTCB'; 99 } 100 for ($i = 105; $i <= 117; $i++) { 101 # if ($i == 115) { next; } 102 my $vol = sprintf("ipp%03d.0",$i); 103 $volume_map{$vol} = 'ITC'; 104 $vol = sprintf("ipp%03d.1",$i); 105 $volume_map{$vol} = 'ITC'; 106 } 107 for ($i = 0; $i <= 15; $i++) { 108 if ($i == 6) { next; } # This isn't "offsite", it's with the rest of the maui cluster. 109 if ($i == 9) { next; } # Not online 110 111 my $vol = sprintf("ippb%02d.0",$i); 112 $volume_map{$vol} = 'OFFSITE'; 113 $vol = sprintf("ippb%02d.1",$i); 114 $volume_map{$vol} = 'OFFSITE'; 115 if ($i <= 6) { 116 $vol = sprintf("ippb%02d.2",$i); 117 $volume_map{$vol} = 'OFFSITE'; 118 } 119 } 120 121 ## Next, get disk space values, and check which hosts are listed as available. 122 my %acceptable_volume = (); 123 my $mounts = $neb->mounts(); 124 foreach my $vol_row (@$mounts) { 125 my ($mount_point, $total, $used, $vol_id, $name, $host, $path, $allocate, $available, $xattr) = @{ $vol_row }; 126 if (($allocate == 1)&&($available == 1)&&( $used / $total < 0.98)) { 127 $acceptable_volume{$name} = 1; 128 } 129 else { 130 print "## $name $allocate $available $used $total\n"; 131 $acceptable_volume{$name} = 0; 132 } 133 if ($name =~ /ippb05/) { 134 $acceptable_volume{$name} = 0; 135 } 136 # if (($name eq 'ipp106.0')||($name eq 'ipp106.1')) { 137 # $acceptable_volume{$name} = 0; 138 # } 139 } 140 141 ## Finally, generate lists containing which volumes are located at which site. 142 ## This allows us to randomly select a volume from the list for the site that 143 ## we plan on replicating to. 144 my %volume_lists = (); 145 foreach my $vol_key (keys %requirement_map) { 146 @{ $volume_lists{$vol_key} } = grep { $acceptable_volume{$_} == 1 } ( 147 grep { $volume_map{$_} eq $vol_key } (keys %volume_map) 148 ); 149 print "$vol_key " . join(' ', @{ $volume_lists{$vol_key} }) . "\n"; 150 151 if ($#{ $volume_lists{$vol_key} } == -1) { 152 die "No acceptable volume found for site $vol_key!\n"; 153 } 154 } 155 156 # die; 157 # Pull data from the gpc1 database about the exposure to consider 81 158 my $verbose = 0; 82 159 my $mdcParser = PS::IPP::Metadata::Config->new; 83 160 161 print("$regtool -processedimfile -exp_id $exp_id -dbname $dbname"); 84 162 my $regtool_cmd = "$regtool -processedimfile -exp_id $exp_id -dbname $dbname"; 85 163 my ($success, $error_code, $full_buf, $stdout_buf, $stderr_buf ) = … … 92 170 &my_die("Unable to parse metadata from regtool -processedimfile", $exp_id); 93 171 172 my $timer_start = time(); 173 my $timer = time(); 174 print "## rawcheck.pl: $exp_id $dbname $do_cull $do_ops $timer_start\n"; 175 # Loop over the imfiles of this exposure. 94 176 foreach my $imfile (@$imfiles) { 95 177 my $key = $imfile->{uri}; … … 100 182 101 183 if (!(defined($hostname))) { $hostname = 'ipp004'; } 102 103 # if (($data_state ne 'full')||($data_state ne 'compressed')) { next; } ## skip things that aren't full. 104 105 # Do validation 106 107 # neb-stat level handling 184 $timer = time() - $timer_start; 185 print ("\n# $key $data_state $md5sum $hostname $class_id T: $timer\n"); 186 187 ## Get instances, and do validation that they have the correct md5sums. 108 188 my $stat = $neb->stat($key); 109 189 die "nebulous key: $key not found" unless $stat; … … 111 191 die "no instances found" unless $instances; 112 192 113 my $user_copies;114 eval {115 $user_copies = $neb->getxattr($key, "user.copies");116 };117 unless(defined($user_copies)) {118 $user_copies = 1;119 }120 121 my @validation;122 my %md5sum_uniq;123 193 my $existing_copies = 0; 124 125 my @existance; 126 my @md5sums; 194 my $Ngood = 0; 195 my %good_instances = (); 196 my %bad_instances = (); 197 127 198 my @files = map {URI->new($_)->file if $_} @$instances; 128 my @diskvols; 129 my @diskhosts; 130 my @quality; 131 my $Ngood = 0; 132 my $quality_mask = 0; 199 my @validation = (); 133 200 134 201 for (my $i = 0; $i <= $#files; $i++) { 202 my ($instance_exists,$instance_md5sum,$instance_host,$instance_volume,$instance_site, $is_good); 203 ($instance_host,$instance_volume) = parse_volume($files[$i]); 204 $instance_site = $volume_map{$instance_volume}; 205 $is_good = 0; 206 135 207 if (-e $files[$i]) { 136 $ existance[$i]= 1;208 $instance_exists = 1; 137 209 $existing_copies++; 138 $md5sums[$i] = local_md5sum($files[$i]); 139 $md5sum_uniq{$md5sums[$i]} = 1; 140 210 $instance_md5sum = local_md5sum($files[$i]); 211 if ($instance_md5sum eq $md5sum) { 212 push @{ $good_instances{$instance_site} }, $i; 213 $is_good = 1; 214 $Ngood++; 215 } 216 else { 217 push @{ $bad_instances{$instance_site} }, $i; 218 } 141 219 } 142 220 else { 143 $existance[$i] = 0; 144 $md5sums[$i] = 'NON-EXISTANT'; 145 $md5sum_uniq{$md5sums[$i]} = 1; 146 } 147 ($diskhosts[$i],$diskvols[$i]) = parse_volume($files[$i]); 148 $validation[$i] = sprintf("% 3d %32s %s %d %s %d", 149 $existance[$i], 150 $md5sums[$i], 151 $files[$i], 152 $md5sums[$i] eq $md5sum, 153 $diskhosts[$i],$diskvols[$i] 154 ); 155 156 # Pre-parse decisions 157 if ($md5sums[$i] ne $md5sum) { 158 $quality[$i] = 0; 159 } 160 elsif ($existance[$i] == 0) { 161 $quality[$i] = 0; 162 } 163 elsif (is_backup_volume($diskhosts[$i])) { 164 $quality[$i] = 1; 165 $quality_mask = $quality_mask | 1; 166 $Ngood++; 167 } 168 elsif ($diskhosts[$i] eq $host_mapping{$class_id}) { 169 $quality[$i] = 1; 170 $quality_mask = $quality_mask | 2; 171 $Ngood++; 172 } 173 else { 174 $quality[$i] = 2; 175 $Ngood++; 176 } 177 } 178 179 print "\n$key $data_state $md5sum $hostname $class_id\n"; 180 if (1) { 181 182 print 183 "object id: ", @$stat[0], "\n", 184 "key: ", @$stat[1], "\n"; 185 print 186 "epoch: ", @$stat[4], "\n"; 187 print 188 "md5sum count: ", scalar(keys %md5sum_uniq), "\n"; 189 print 190 "requested instances: ", $user_copies, "\n", 191 "available instances: ", @$stat[6], "\n", 192 "existing instances: ", $existing_copies, "\n", 193 "total instances: ", @$stat[7], "\n", 194 "instance location:\n", " " x 4; 195 print 196 join("\n" . " " x 4, @validation), "\n"; 197 } 221 $instance_exists = 0; 222 $instance_md5sum = 'NON-EXISTANT'; 223 push @{ $bad_instances{$instance_site} }, $i; 224 } 225 226 $validation[$i] = sprintf(" % 3d %d %32s %s %s %s\n", 227 $instance_exists,$is_good,$instance_md5sum, 228 $files[$i],$instance_host,$instance_volume); 229 } 230 231 $timer = time() - $timer_start; 232 # object_id ext_id epoch available existing total timer 233 printf("%s %s %s %d %d %d %d\n",@$stat[0],@$stat[1],@$stat[4],@$stat[6],$existing_copies,@$stat[7],$timer); 234 # instance_exists is_good instance_md5sum file instance_host instance_volume 235 my $val_string = join('',@validation); 236 print "$val_string"; 198 237 199 238 200 239 # Decide what to do 240 241 ## This block attempts to find an out-of-nebulous instance with a good md5sum, 242 ## and substitutes it for the 0-th listed instance. This gives us one good 243 ## copy to work with. 201 244 if ($Ngood == 0) { 202 245 # DO something to attempt to fix this. … … 220 263 } 221 264 else { 222 $quality[0] = 1;223 265 print "cp $good_file $files[0]\n"; 224 system("cp $good_file $files[0]"); 266 if ($do_ops) { 267 system("cp $good_file $files[0]"); 268 } 225 269 } 226 270 # Begin my best validation thought … … 234 278 235 279 $Ngood = 1; # We now hand off this single valid instance object to be handled by the Ngood=1 case. 236 } 237 238 if ($quality[0] == 0) { # The first instance is bad. 239 # But since we're here, and not up there, there must be at least one good copy. Find it. 240 for (my $i = 0; $i <= $#md5sums; $i++) { 241 if ($md5sums[$i] eq $md5sum) { # Found it. 242 print "cp $files[$i] $files[0]\n"; 243 system("cp $files[$i] $files[0]"); 244 # Begin my best validation thought 245 { 246 my $tmpmd5 = local_md5sum($files[0]); 247 if ($tmpmd5 ne $md5sum) { 248 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 249 } 280 ## We've done work here, so we can't do a cull this iteration. 281 if ($do_cull == 1) { $do_cull = -1; } 282 } 283 284 ## We have more than zero bad copies. We may cull some of these in the future, but we should try to 285 ## leave everything in the best state possible. 286 printf(">> %d %d\n",$Ngood, $#files + 1); 287 if ($Ngood != $#files + 1) { 288 my $good_copy; 289 my $good_copy_index; 290 foreach my $site_key (keys %good_instances) { 291 if ($#{ $good_instances{$site_key} } != -1) { 292 $good_copy_index = $good_instances{$site_key}[0]; 293 $good_copy = $files[$good_copy_index]; 294 last; 295 } 296 } 297 printf(">> GOOD: $good_copy\n"); 298 foreach my $site_key (keys %bad_instances) { 299 foreach my $bad_copy_index (@{ $bad_instances{$site_key} }) { 300 print "cp $good_copy $files[$bad_copy_index]\n"; 301 if ($do_ops) { 302 system("cp $good_copy $files[$bad_copy_index]"); 250 303 } 251 # End my best validation thought. 252 last; # We're done here now. 253 } 254 } 255 } 256 257 if ($Ngood == 1) { # We have only one version 258 if ($quality_mask & 1) { # ANd it's on a backup volume 259 print "neb-replicate $key\n"; 260 if ($do_ops) { 261 $neb->replicate($key) or die "failed to replicate the single valid copy"; 262 if ($@) { die $@; } 263 264 } 265 266 } 267 else { # And it's not, so put one there 268 my $rep_vol = get_random_backup_volume(); 304 my $tmpmd5 = local_md5sum($files[$bad_copy_index]); 305 if ($tmpmd5 ne $md5sum) { 306 ## This isn't super critical, so we don't need to die here. 307 warn "Post-repair md5sum does not match! $tmpmd5 != $md5sum: $files[$bad_copy_index]"; 308 } 309 else { 310 ## success 311 push @{ $good_instances{$site_key} }, $bad_copy_index; 312 } 313 } 314 } 315 ## We've done work here, so we can't do a cull this iteration. 316 if ($do_cull == 1) { $do_cull = -1; } 317 } 318 319 ## We can now attempt to make replicated copies to the sites that require additional copies. 320 foreach my $site_key (keys %requirement_map) { 321 my $have_instances = $#{ $good_instances{$site_key} } + 1; 322 print "## $site_key $have_instances $requirement_map{$site_key}\n"; 323 if ($#{ $good_instances{$site_key} } + 1 < $requirement_map{$site_key}) { 324 my $rep_vol = get_random_site_volume($site_key); 269 325 print "neb-replicate --volume $rep_vol $key\n"; 270 326 if ($do_ops) { … … 277 333 @$uris = map {URI->new($_)->file if $_} @$uris; 278 334 my $tmpmd5 = local_md5sum(${ $uris }[0]); 335 336 my $validation_str = sprintf("% 3d %d %32s %s %s %s", 337 -1,-1,$tmpmd5, 338 ${ $uris }[0],"repl",$rep_vol); 339 print 340 join("\n" . " " x 4, $validation_str), "\n"; 341 279 342 if ($tmpmd5 ne $md5sum) { 280 343 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; … … 282 345 # End my best validation thought. 283 346 } 284 } 285 } 286 else { # N >= 2 287 if (!($quality_mask & 1)) { # no backup copy 288 my $rep_vol = get_random_backup_volume(); 289 print "neb-replicate --volume $rep_vol $key\n"; 290 if ($do_ops) { 291 $neb->replicate($key,$rep_vol) or die "failed to replicate a copy to the backup node"; 292 if ($@) { die $@; } 293 294 # Begin my best validation thought 295 system("sync") == 0 or die "Couldn't sync?"; 296 my $uris = $neb->find_instances($key,$rep_vol); 297 @$uris = map {URI->new($_)->file if $_} @$uris; 298 my $tmpmd5 = local_md5sum(${ $uris }[0]); 299 if ($tmpmd5 ne $md5sum) { 300 die "Post-replication md5sum does not match! $tmpmd5 != $md5sum"; 301 } 302 # End my best validation thought. 303 304 } 305 } 306 if (!($quality_mask & 2)) { # no copy on the requested host, so select the first as valid. 307 for (my $i = 0; $i <= $#files; $i++) { 308 if ($quality[$i] == 2) { 309 $quality[$i] = 1; 310 last; 311 } 312 } 313 } 314 for (my $i = 0; $i <= $#files; $i++) { 315 # print "$existance[$i] $quality[$i] $md5sums[$i] $md5sum $diskhosts[$i] $diskvols[$i]\n"; 316 if ($quality[$i] != 1) { 317 if ($existance[$i] == 0) { # This disk file doesn't exist. 318 system("touch $files[$i]"); 319 } 320 my $cull_vol = $diskhosts[$i] . "." . $diskvols[$i]; 321 print "neb-cull --volume $cull_vol $key\n"; 322 if ($do_ops) { 323 # The tilde here is to force hard volumes. Don't touch it. 324 $neb->cull($key,"~${cull_vol}",2) or die "failed to cull a superfluous instance"; 325 if ($@) { die "$@"; } 326 } 327 } 328 } 329 } 330 # exit(0); 331 } 347 ## We've done work here, so we can't do a cull this iteration. 348 if ($do_cull == 1) { $do_cull = -1; } 349 } 350 } 351 352 ## Do culls if that's what we were going to do. 353 if ($do_cull == -1) { 354 die "Cull option passed, but files were modified in the scan/repair/replicate phase. Not running cull!\n"; 355 } 356 elsif ($do_cull == 1) { 357 ## At this point, we should have no files in the bad_instances lists, because we've repaired them. 358 foreach my $site_key (keys %good_instances) { 359 if ($#{ $good_instances{$site_key} } + 1 > $requirement_map{$site_key}) { 360 for ($i = $requirement_map{$site_key}; $i <= $#{ $good_instances{$site_key} }; $i++) { 361 my $cull_index = ${ $good_instances{$site_key} }[$i]; 362 my ($instance_host,$instance_volume) = parse_volume($files[$cull_index]); 363 print "neb-cull --volume $instance_volume $key\n"; 364 if ($do_ops) { 365 # The tilde here is to force hard volumes. Don't touch it. 366 # Also: the 2 is a "minimum number of copies" restriction. Let's not be crazy here. 367 $neb->cull($key,"~${instance_volume}",2) or die "failed to cull a superfluous instance"; 368 if ($@) { die "$@"; } 369 } 370 } # End loop over extra instances 371 } # End check for sites with extra instances 372 } # End loop over sites. 373 } # End cull 374 375 } ## End loop over imfiles. 332 376 333 377 sub local_md5sum { … … 350 394 my $filename = shift(@_); 351 395 my $full_volume = (split /\//, $filename)[2]; 352 my ($hostname,$vol_index) = split /\./, $full_volume; # /; 353 return($hostname,$vol_index); 354 } 355 356 sub is_backup_volume { 357 my $hostname = shift(@_); 358 if (exists($backup_hosts{$hostname})) { 359 return(1); 360 } 361 return(0); 362 } 363 364 sub get_random_backup_volume { 365 my $NN = scalar keys %backup_destinations; 366 my $backup_host = (keys %backup_destinations)[int(rand($NN))]; 367 my $backup_vol = int(rand($backup_Nvols)); 368 369 return("${backup_host}.${backup_vol}"); 396 my ($hostname,undef) = split /\./, $full_volume; # /; 397 return($hostname,$full_volume); 398 } 399 400 sub get_random_site_volume { 401 my $site_key = shift(@_); 402 my $NN = scalar @{ $volume_lists{$site_key} }; 403 my $backup_volume = ${ $volume_lists{$site_key} }[int(rand($NN))]; 404 return($backup_volume); 370 405 } 371 406 -
branches/czw_branch/20160809/ippTools/share/pztool_pendingimfile.sql
r33533 r39924 20 20 AND pzDownloadImfile.class IS NULL 21 21 AND pzDownloadImfile.class_id IS NULL 22 AND summitImfile.exp_name NOT LIKE ' %a'22 AND summitImfile.exp_name NOT LIKE 'c%' 23 23 ORDER BY dateobs) AS partA 24 24 UNION … … 42 42 AND pzDownloadImfile.class IS NULL 43 43 AND pzDownloadImfile.class_id IS NULL 44 AND summitImfile.exp_name LIKE ' %a'44 AND summitImfile.exp_name LIKE 'c%' 45 45 ORDER BY dateobs) AS partB 46 46 ) as Foo -
branches/czw_branch/20160809/ippconfig/recipes/nightly_science.config
r39827 r39924 134 134 DIFFABLE BOOL FALSE 135 135 OFFNIGHT_DIFFS BOOL FALSE 136 REDUCTION STR QUB_DEFAULT136 REDUCTION STR MD_DEFAULT 137 137 DIST S16 15 138 138 CHIP S16 5 … … 365 365 TARGETS METADATA 366 366 NAME STR NCU 367 DISTRIBUTION STR SweetSpot367 DISTRIBUTION STR NCU 368 368 TESS STR RINGS.V3 369 369 OBSMODE STR NCU -
branches/czw_branch/20160809/ippconfig/recipes/psphot.config
r39915 r39924 115 115 PSF_FIT_MIN_VALID_FLUX F32 -100000000.0 # minimum allow flux for fitted source 116 116 PSF_FIT_MAX_VALID_FLUX F32 +100000000.0 # maximum allow flux for fitted source 117 118 PSF_FIT_MODEL_SUM_FRAC_CUT F32 0.85 119 PSF_FIT_MASKED_SUM_FRAC_CUT F32 0.85 117 120 118 121 # the following is used to require a minimum quality of fit before … … 720 723 OUTPUT.FORMAT STR PS1_DV3 # Format of output file 721 724 725 PSF_FIT_MODEL_SUM_FRAC_CUT F32 0.5 726 PSF_FIT_MASKED_SUM_FRAC_CUT F32 0.5 727 722 728 # Extended source fit parameters 723 729 # these models are used for high-quality shape analysis after all sources have been removed -
branches/czw_branch/20160809/psLib/share/tai_utc.dat
r37817 r39924 53 53 2456109.5 35.0000000 41317.0 0.0000000 54 54 2457204.5 36.0000000 41317.0 0.0000000 55 2457754.5 37.0000000 41317.0 0.0000000 -
branches/czw_branch/20160809/psLib/share/tai_utc.raw
r37817 r39924 39 39 2012 JUL 1 =JD 2456109.5 TAI-UTC= 35.0 S + (MJD - 41317.) X 0.0 S 40 40 2015 JUL 1 =JD 2457204.5 TAI-UTC= 36.0 S + (MJD - 41317.) X 0.0 S 41 2017 JAN 1 =JD 2457754.5 TAI-UTC= 37.0 S + (MJD - 41317.) X 0.0 S -
branches/czw_branch/20160809/psphot/src/psphotFitSourcesLinear.c
r38514 r39924 193 193 MAX_VALID_FLUX = 1e+8; 194 194 } 195 196 float cutModelSum = psMetadataLookupF32(&status, recipe, "PSF_FIT_MODEL_SUM_FRAC_CUT"); 197 float cutMaskedSum = psMetadataLookupF32(&status, recipe, "PSF_FIT_MASKED_SUM_FRAC_CUT"); 195 198 196 199 // XXX test: choose a larger-than expected radius: … … 278 281 279 282 // printf("%5d %4.3f %4.3f %4.3f\n", source->seq, normFlux, modelSum, maskedSum); 280 float cut = .85; 281 if (modelSum < cut * normFlux) continue; 282 if (maskedSum < cut * normFlux) continue; 283 //float cut = .85; 284 //if (modelSum < cut * normFlux) continue; 285 //if (maskedSum < cut * normFlux) continue; 286 if (modelSum < cutModelSum * normFlux) continue; 287 if (maskedSum < cutMaskedSum * normFlux) continue; 283 288 284 289 // clear the 'mark' pixels and remask on the fit aperture
Note:
See TracChangeset
for help on using the changeset viewer.
