Changeset 21379
- Timestamp:
- Feb 6, 2009, 10:37:19 AM (17 years ago)
- Location:
- trunk/Ohana/src/opihi
- Files:
-
- 1 added
- 6 edited
-
include/pcontrol.h (modified) (3 diffs)
-
pcontrol/CheckIdleHost.c (modified) (5 diffs)
-
pcontrol/CheckSystem.c (modified) (4 diffs)
-
pcontrol/JobOps.c (modified) (1 diff)
-
pcontrol/check.c (modified) (4 diffs)
-
pcontrol/status.c (modified) (2 diffs)
-
pcontrol/test/hosttargets.sh (added)
Legend:
- Unmodified
- Added
- Removed
-
trunk/Ohana/src/opihi/include/pcontrol.h
r21153 r21379 25 25 PCONTROL_JOB_NEEDHOST, 26 26 } JobMode; 27 28 /** job mode check stages **/ 29 typedef enum { 30 PCONTROL_JOB_STAGE_ANYHOST, 31 PCONTROL_JOB_STAGE_WANTHOST, 32 PCONTROL_JOB_STAGE_NEEDHOST, 33 PCONTROL_JOB_STAGE_OLDWANT, 34 } JobCheckStage; 27 35 28 36 /** job thread options values **/ … … 180 188 int CheckDoneHosts (float delay); 181 189 int CheckDownHosts (float delay); 182 int CheckIdleHosts (float delay );190 int CheckIdleHosts (float delay, int Stage); 183 191 int CheckLiveHosts (float delay); 184 192 int SetRunSystem (int state); … … 202 210 203 211 int StartHost (Host *host); 204 int CheckIdleHost (Host *host );212 int CheckIdleHost (Host *host, int Stage); 205 213 int CheckDoneJob (Job *job, Host *host); 206 214 int GetJobOutput (char *command, Host *host, JobOutput *output); -
trunk/Ohana/src/opihi/pcontrol/CheckIdleHost.c
r20324 r21379 4 4 5 5 /* the supplied host is not on a stack: it cannot be taken by the other thread */ 6 int CheckIdleHost (Host *host ) {6 int CheckIdleHost (Host *host, int Stage) { 7 7 8 8 int i; 9 9 Stack *stack; 10 10 Job *job; 11 struct timeval now; 12 float dtime; 11 13 12 14 if (logfile == NULL) { … … 28 30 29 31 /* look for first NEEDHOST matching this host */ 30 for (i = 0; i < stack[0].Nobject; i++) {32 for (i = 0; (Stage == PCONTROL_JOB_STAGE_NEEDHOST) && (i < stack[0].Nobject); i++) { 31 33 job = (Job *) stack[0].object[i]; 32 34 if (job[0].mode != PCONTROL_JOB_NEEDHOST) continue; … … 48 50 49 51 /* no NEEDHOST entry, look for first WANTHOST matching this host */ 50 for (i = 0; i < stack[0].Nobject; i++) {52 for (i = 0; (Stage == PCONTROL_JOB_STAGE_WANTHOST) && (i < stack[0].Nobject); i++) { 51 53 job = (Job *) stack[0].object[i]; 52 54 if (job[0].mode != PCONTROL_JOB_WANTHOST) continue; … … 68 70 69 71 /* no WANTHOST entry, look for first ANYHOST matching this host */ 70 for (i = 0; i < stack[0].Nobject; i++) {72 for (i = 0; (Stage == PCONTROL_JOB_STAGE_ANYHOST) && (i < stack[0].Nobject); i++) { 71 73 job = (Job *) stack[0].object[i]; 72 74 if (job[0].mode != PCONTROL_JOB_ANYHOST) continue; … … 86 88 87 89 /* no ANYHOST entry, look for first WANTHOST with old time */ 88 /* XXX perhaps I should add this to the conditions for ANYHOST instead of 89 running a separate loop? ie, WANTHOST && time > X == ANYHOST */ 90 for (i = 0; i < stack[0].Nobject; i++) { 90 for (i = 0; (Stage == PCONTROL_JOB_STAGE_OLDWANT) && (i < stack[0].Nobject); i++) { 91 91 job = (Job *) stack[0].object[i]; 92 92 if (job[0].mode != PCONTROL_JOB_WANTHOST) continue; 93 // XXX test the job age and skip if too young 93 94 // allow WANT jobs to wait up to 10.0 sec for the host to be free before giving up 95 gettimeofday (&now, (void *) NULL); 96 dtime = DTIME (now, job[0].start); 97 if (dtime > 10.0) continue; 94 98 95 99 if (logfile) fprintf (logfile, "start wanthost(2) %s (job host %s) : %s\n", host[0].hostname, job[0].hostname, job[0].argv[0]); -
trunk/Ohana/src/opihi/pcontrol/CheckSystem.c
r18098 r21379 22 22 23 23 /* we want to give each block a maximum allowed time */ 24 CheckIdleHosts(0.020); /* submit a new job */ 24 CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_NEEDHOST); /* submit a new job */ 25 CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_WANTHOST); /* submit a new job */ 26 CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_ANYHOST); /* submit a new job */ 27 CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_OLDWANT); /* submit a new job */ 25 28 26 29 CheckBusyJobs(0.020); /* get job status */ … … 109 112 if (RunLevel == PCONTROL_RUN_ALL) { 110 113 // we want to give each block a maximum allowed time 111 Nhostchecks += CheckIdleHosts(0.020); /* submit a new job (PCLIENT) */ 114 Nhostchecks += CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_NEEDHOST); /* submit a new job (PCLIENT) */ 115 Nhostchecks += CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_WANTHOST); /* submit a new job (PCLIENT) */ 116 Nhostchecks += CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_ANYHOST); /* submit a new job (PCLIENT) */ 117 Nhostchecks += CheckIdleHosts(0.015, PCONTROL_JOB_STAGE_OLDWANT); /* submit a new job (PCLIENT) */ 112 118 TestCheckPoint (); 113 119 } … … 367 373 } 368 374 369 int CheckIdleHosts (float MaxDelay) { 375 // if we have any IDLE hosts, check if there are jobs to be launched 376 // for each pass, we only check one type of job: stage = NEED, WANT, ANY, OLDWANT 377 int CheckIdleHosts (float MaxDelay, int Stage) { 370 378 371 379 struct timeval start, stop; … … 389 397 host = PullStackByLocation (stack, STACK_TOP); 390 398 if (host == NULL) break; 391 CheckIdleHost (host); 392 gettimeofday (&stop, (void *) NULL); 393 dtime = DTIME (stop, start); 394 } 399 CheckIdleHost (host, Stage); 400 gettimeofday (&stop, (void *) NULL); 401 dtime = DTIME (stop, start); 402 } 403 395 404 if (DEBUG) gprint (GP_ERR, "checked %d hosts\n", i); 396 405 return (i); -
trunk/Ohana/src/opihi/pcontrol/JobOps.c
r20047 r21379 231 231 PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM); 232 232 233 // until the job is launched, we use 'start' to time how long the job is waiting on the queue 234 gettimeofday (&job[0].start, (void *) NULL); 235 233 236 if (VerboseMode()) gprint (GP_ERR, "added new job\n"); 234 237 return (JobID); -
trunk/Ohana/src/opihi/pcontrol/check.c
r20047 r21379 3 3 int check (int argc, char **argv) { 4 4 5 int N, Save; 5 6 int JobID, HostID; 6 7 … … 8 9 Job *job = NULL; 9 10 Host *host = NULL; 11 12 Save = FALSE; 13 if ((N = get_argument (argc, argv, "-save"))) { 14 remove_argument (N, &argc, argv); 15 Save = TRUE; 16 } 10 17 11 18 if (argc != 3) { … … 39 46 gprint (GP_LOG, "HOSTNAME NONE\n"); 40 47 } 48 49 if (Save) { 50 set_str_variable ("JOB_STATUS", GetJobStackName(job[0].stack)); 51 set_int_variable ("JOB_EXITST", job[0].exit_status); 52 set_int_variable ("JOB_STDOUT_SIZE", job[0].stdout.size); 53 set_int_variable ("JOB_STDERR_SIZE", job[0].stderr.size); 54 set_variable ("JOB_DTIME", job[0].dtime); 55 set_str_variable ("JOB_HOSTNAME", job[0].hostname); 56 if (job[0].realhost) { 57 set_str_variable ("JOB_REALHOST", job[0].realhost); 58 } else { 59 set_str_variable ("JOB_REALHOST", "NONE"); 60 } 61 } 62 41 63 PushStack (stack, STACK_BOTTOM, job, job[0].JobID, job[0].argv[0]); 42 64 return (TRUE); … … 53 75 } 54 76 gprint (GP_LOG, "host %s\n", GetHostStackName(host[0].stack)); 77 78 if (Save) { 79 set_str_variable ("HOST_STATE", GetHostStackName(host[0].stack)); 80 } 81 55 82 PushStack (stack, STACK_BOTTOM, host, host[0].HostID, host[0].hostname); 56 83 return (TRUE); -
trunk/Ohana/src/opihi/pcontrol/status.c
r18098 r21379 17 17 Stack *stack; 18 18 Job *job; 19 struct timeval now; 20 float dtime; 19 21 20 22 stack = GetJobStack (Nstack); … … 34 36 } 35 37 gprint (GP_LOG, "%7s ", GetJobStackName (job[0].state)); 38 39 switch (job[0].state) { 40 // for active jobs or pending jobs, print time since start (or create in the case of pending) 41 case PCONTROL_JOB_PENDING: 42 case PCONTROL_JOB_BUSY: 43 case PCONTROL_JOB_RESP: 44 case PCONTROL_JOB_HUNG: 45 gettimeofday (&now, (void *) NULL); 46 dtime = DTIME (now, job[0].start); 47 gprint (GP_LOG, "%8.2f ", dtime); 48 break; 49 50 // for active jobs or pending jobs, print time since start (or create in the case of pending) 51 case PCONTROL_JOB_DONE: 52 case PCONTROL_JOB_KILL: 53 case PCONTROL_JOB_EXIT: 54 case PCONTROL_JOB_CRASH: 55 default: 56 dtime = DTIME (job[0].stop, job[0].start); 57 gprint (GP_LOG, "%8.2f ", dtime); 58 break; 59 } 60 36 61 for (j = 0; j < job[0].argc; j++) { 37 62 gprint (GP_LOG, "%s ", job[0].argv[j]);
Note:
See TracChangeset
for help on using the changeset viewer.
