Changeset 11898
- Timestamp:
- Feb 19, 2007, 3:18:27 PM (19 years ago)
- Location:
- trunk/Ohana/src/opihi
- Files:
-
- 1 added
- 14 edited
-
doc/pantasks.txt (modified) (1 diff)
-
include/pantasks.h (modified) (1 diff)
-
pantasks/CheckJobs.c (modified) (3 diffs)
-
pantasks/CheckTasks.c (modified) (6 diffs)
-
pantasks/ControllerOps.c (modified) (2 diffs)
-
pantasks/LocalJob.c (modified) (1 diff)
-
pantasks/TaskOps.c (modified) (2 diffs)
-
pantasks/controller_threads.c (modified) (1 diff)
-
pantasks/input_threads.c (modified) (1 diff)
-
pantasks/job_threads.c (modified) (1 diff)
-
pantasks/task_threads.c (modified) (1 diff)
-
pantasks/test/sleep.sh (modified) (1 diff)
-
pantasks/test/sleep2.sh (added)
-
pantasks/verbose.c (modified) (2 diffs)
-
pcontrol/CheckSystem.c (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/Ohana/src/opihi/doc/pantasks.txt
r8190 r11898 1 2 - task spawning speed 3 4 I have been examining things which affect the speed of the pantasks 5 processing. I have learned some interesting things: 6 7 * pcontrol was being slammed with requests for status by pantasks. 8 this may account for Paul's controller hang-ups. I have added a 9 long (500ms) sleep to the controller thread to limit the rate at 10 which controller checks are run 11 12 * adding even a small usleep to the task_thread or job_thread puts 13 them to sleep for a long time (>> 10ms). it seems longer than the 14 linux time slicer. I have removed sleeps from the task and job 15 threads. 16 17 * the job submit rate is apparently limited by two things: 18 19 * when the job is submitted (SubmitJob) the interaction with 20 the controller seems to take ~30ms or more. 21 22 * some thread (controller thread? main readline thread?) seems to 23 introduce timeouts which are very long (up to 100ms). These 24 introduce bit delays if when they happen during the task_thread 25 loop. 1 26 2 27 - updates for queues: -
trunk/Ohana/src/opihi/include/pantasks.h
r11388 r11898 174 174 Task *GetActiveTask (); 175 175 void SetTaskTimer (struct timeval *timer); 176 double GetTaskTimer (struct timeval start );176 double GetTaskTimer (struct timeval start, int verbose); 177 177 void InitTaskTimers (); 178 178 int TaskHash (char *input); -
trunk/Ohana/src/opihi/pantasks/CheckJobs.c
r11542 r11898 11 11 Queue *queue; 12 12 13 // int Ncheck; 14 // Ncheck = 0; 15 13 16 /** test all jobs: ready to test? finished? **/ 14 17 while ((job = NextJob ()) != NULL) { 18 // Ncheck ++; 15 19 16 20 task = job[0].task; 17 21 18 22 /* check poll period (ready to ask for status?) */ 19 if (GetTaskTimer(job[0].last ) < task[0].poll_period) continue;23 if (GetTaskTimer(job[0].last, FALSE) < task[0].poll_period) continue; 20 24 21 25 /* check current status */ … … 135 139 */ 136 140 if (job[0].mode == JOB_LOCAL) { 137 if (GetTaskTimer(job[0].start ) < task[0].timeout_period) continue;141 if (GetTaskTimer(job[0].start, FALSE) < task[0].timeout_period) continue; 138 142 if (VerboseMode()) gprint (GP_LOG, "timeout on %s\n", task[0].name); 139 143 … … 176 180 /* reset polling clock */ 177 181 SetTaskTimer (&job[0].last); 178 if (TestElapsedCheck()) return (TRUE); 182 if (TestElapsedCheck()) { 183 // fprintf (stderr, "check %d jobs\n", Ncheck); 184 return (TRUE); 185 } 179 186 } 187 // fprintf (stderr, "check %d jobs\n", Ncheck); 180 188 return (TRUE); 181 189 } -
trunk/Ohana/src/opihi/pantasks/CheckTasks.c
r11324 r11898 6 6 Task *task; 7 7 int status; 8 struct timeval now; 8 9 9 10 /** test all tasks: ready to test? ready to run? **/ … … 13 14 14 15 /* ready to test? : check exec period */ 15 if (GetTaskTimer(task[0].last ) < task[0].exec_period) continue;16 if (GetTaskTimer(task[0].last, FALSE) < task[0].exec_period) continue; 16 17 17 18 /* need to check if the current time is within valid/invalid periods */ … … 25 26 } 26 27 if (task[0].NpendingMax && (task[0].Npending >= task[0].NpendingMax)) { 27 fprintf (stderr, "npending: %d, max npending: %d\n", task[0].Npending, task[0].NpendingMax);28 // fprintf (stderr, "npending: %d, max npending: %d\n", task[0].Npending, task[0].NpendingMax); 28 29 gettimeofday (&task[0].last, (void *) NULL); 29 30 continue; 30 31 } 32 33 // gettimeofday (&now, (void *) NULL); 34 // fprintf (stderr, "t0: %d %6d - \n", now.tv_sec, now.tv_usec); 31 35 32 36 /* ready to run? : run task.exec macro */ … … 39 43 } 40 44 45 // gettimeofday (&now, (void *) NULL); 46 // fprintf (stderr, "t1: %d %6d - \n", now.tv_sec, now.tv_usec); 47 41 48 /* check if there are errors with this task */ 42 49 if (!ValidateTask (task, TRUE)) { … … 44 51 continue; 45 52 } 53 54 // gettimeofday (&now, (void *) NULL); 55 // fprintf (stderr, "t2: %d %6d - \n", now.tv_sec, now.tv_usec); 46 56 47 57 /* construct job from task */ 48 58 job = CreateJob (task); 49 59 60 // gettimeofday (&now, (void *) NULL); 61 // fprintf (stderr, "t3: %d %6d - \n", now.tv_sec, now.tv_usec); 62 50 63 /* execute job - XXX add status test */ 51 64 SubmitJob (job); 65 66 // fprintf (stderr, "nl: %d %6d - ", 67 // task[0].last.tv_sec, task[0].last.tv_usec); 52 68 53 69 /* reset timer on task (don't do this if Create/Submit fails) (why not??) */ … … 55 71 task[0].Njobs ++; 56 72 task[0].Npending ++; 73 74 // fprintf (stderr, "%d %6d\n", 75 // task[0].last.tv_sec, task[0].last.tv_usec); 57 76 58 77 /* increment Nrun for inclusive ranges with Nmax */ -
trunk/Ohana/src/opihi/pantasks/ControllerOps.c
r11446 r11898 352 352 } 353 353 354 /* for commands which don't return a prompt, don't look for one */ 355 if (response == NULL) { 356 return (TRUE); 357 } 358 354 359 /* watch for response - wait up to 1 second */ 355 360 line = NULL; … … 456 461 sprintf (cmd, "quit"); 457 462 InitIOBuffer (&buffer, 0x100); 458 status = ControllerCommand (cmd, "", &buffer);459 FreeIOBuffer (&buffer); 460 461 /* the quit command does not return a prompt, so we always462 get an error on the controller here*/463 status = ControllerCommand (cmd, NULL, &buffer); 464 FreeIOBuffer (&buffer); 465 466 /* the quit command does not return a prompt, 467 check that the controller exited */ 463 468 StopController (); 464 469 return (TRUE); -
trunk/Ohana/src/opihi/pantasks/LocalJob.c
r11055 r11898 97 97 exit (1); 98 98 } 99 job[0].dtime = GetTaskTimer (job[0].start );99 job[0].dtime = GetTaskTimer (job[0].start, FALSE); 100 100 break; 101 101 } -
trunk/Ohana/src/opihi/pantasks/TaskOps.c
r11324 r11898 547 547 /*** task timer functions ***/ 548 548 549 double GetTaskTimer (struct timeval start ) {549 double GetTaskTimer (struct timeval start, int verbose) { 550 550 551 551 double dtime; … … 555 555 dtime = DTIME (now, start); 556 556 557 if (verbose) { 558 fprintf (stderr, "tt: %d %6d - %d %6d : %f\n", 559 now.tv_sec, now.tv_usec, 560 start.tv_sec, start.tv_usec, dtime); 561 } 562 557 563 return (dtime); 558 564 } -
trunk/Ohana/src/opihi/pantasks/controller_threads.c
r11084 r11898 28 28 CheckControllerOutput (); 29 29 SerialThreadUnlock (); 30 usleep (10000); // allow other threads a chance to run 30 if (VerboseMode() == 2) fprintf (stderr, "C"); 31 // fprintf (stderr, "**** C ****"); 32 usleep (500000); // allow other threads a chance to run 31 33 } 32 34 } -
trunk/Ohana/src/opihi/pantasks/input_threads.c
r11084 r11898 27 27 CheckInputs (); 28 28 SerialThreadUnlock (); 29 fprintf (stderr, "I");30 usleep (10000); // allow other threads a chance to run29 if (VerboseMode() == 2) fprintf (stderr, "I"); 30 // usleep (10000); // allow other threads a chance to run 31 31 } 32 32 } -
trunk/Ohana/src/opihi/pantasks/job_threads.c
r11084 r11898 27 27 CheckJobs (); 28 28 SerialThreadUnlock (); 29 usleep (10000); // allow other threads a chance to run 29 if (VerboseMode() == 2) fprintf (stderr, "J"); 30 // fprintf (stderr, "J"); 31 // usleep (10000); // allow other threads a chance to run 30 32 } 31 33 } -
trunk/Ohana/src/opihi/pantasks/task_threads.c
r11084 r11898 27 27 CheckTasks (); 28 28 SerialThreadUnlock (); 29 usleep (10000); // allow other threads a chance to run 29 if (VerboseMode() == 2) fprintf (stderr, "T"); 30 // fprintf (stderr, "T"); 31 // usleep (1000); // allow other threads a chance to run 30 32 } 31 33 } -
trunk/Ohana/src/opihi/pantasks/test/sleep.sh
r11318 r11898 6 6 7 7 periods -poll 0.1 8 periods -exec 1.08 periods -exec 0.2 9 9 periods -timeout 20 10 npending 210 npending 5 11 11 12 12 stdout tmp.txt 13 13 stderr tmp.txt 14 15 task.exec 16 echo "create command" 17 end 14 18 15 19 # success -
trunk/Ohana/src/opihi/pantasks/verbose.c
r7917 r11898 23 23 return (TRUE); 24 24 } 25 if (!strcasecmp (argv[1], "THREADS")) { 26 VERBOSE = 2; 27 return (TRUE); 28 } 25 29 if (!strcasecmp (argv[1], "TOGGLE")) { 26 30 VERBOSE = ~VERBOSE; … … 29 33 } 30 34 31 gprint (GP_ERR, "USAGE: verbose (on/off/t oggle)\n");35 gprint (GP_ERR, "USAGE: verbose (on/off/threads/toggle)\n"); 32 36 return (FALSE); 33 37 } -
trunk/Ohana/src/opihi/pcontrol/CheckSystem.c
r10693 r11898 88 88 89 89 if ((RunLevel == PCONTROL_RUN_ALL) || (RunLevel == PCONTROL_RUN_REAP)) { 90 Njobchecks += CheckBusyJobs(0.020); /* get job status */ 91 Njobchecks += CheckDoneJobs(0.020); /* harvest job stdout/stderr */ 92 Njobchecks += CheckKillJobs(0.020); /* harvest job stdout/stderr */ 90 Njobchecks += CheckBusyJobs(0.020); /* get job status (PCLIENT) */ 91 TestCheckPoint (); 92 Njobchecks += CheckDoneJobs(0.020); /* harvest job stdout/stderr (!PCLIENT) */ 93 TestCheckPoint (); 94 Njobchecks += CheckKillJobs(0.020); /* harvest job stdout/stderr (PCLIENT) */ 93 95 TestCheckPoint (); 94 96 } … … 96 98 if (RunLevel != PCONTROL_RUN_NONE) { 97 99 Nhostchecks += CheckDoneHosts(0.020); /* reset the host */ 100 TestCheckPoint (); 98 101 Nhostchecks += CheckDownHosts(0.100); /* launch the host */ 99 102 TestCheckPoint (); … … 102 105 if (RunLevel == PCONTROL_RUN_ALL) { 103 106 // we want to give each block a maximum allowed time 104 Nhostchecks += CheckIdleHosts(0.020); /* submit a new job */107 Nhostchecks += CheckIdleHosts(0.020); /* submit a new job (PCLIENT) */ 105 108 TestCheckPoint (); 106 109 }
Note:
See TracChangeset
for help on using the changeset viewer.
