Changeset 4763
- Timestamp:
- Aug 14, 2005, 10:33:53 AM (21 years ago)
- Location:
- trunk/Ohana/src/opihi
- Files:
-
- 1 added
- 26 edited
-
include/pcontrol.h (modified) (1 diff)
-
include/psched.h (modified) (2 diffs)
-
include/shell.h (modified) (1 diff)
-
lib.shell/ListOps.c (modified) (2 diffs)
-
lib.shell/opihi.c (modified) (1 diff)
-
pantasks/CheckController.c (modified) (4 diffs)
-
pantasks/CheckJobs.c (modified) (1 diff)
-
pantasks/CheckSystem.c (modified) (1 diff)
-
pantasks/ControllerOps.c (modified) (9 diffs)
-
pantasks/Makefile (modified) (1 diff)
-
pantasks/controller.c (modified) (2 diffs)
-
pantasks/controller_host.c (modified) (1 diff)
-
pantasks/controller_pulse.c (added)
-
pantasks/controller_status.c (modified) (1 diff)
-
pantasks/psched.c (modified) (2 diffs)
-
pclient/ChildOps.c (modified) (2 diffs)
-
pclient/pclient.c (modified) (1 diff)
-
pcontrol/CheckBusyJob.c (modified) (1 diff)
-
pcontrol/CheckDoneHost.c (modified) (1 diff)
-
pcontrol/CheckHost.c (modified) (1 diff)
-
pcontrol/CheckSystem.c (modified) (2 diffs)
-
pcontrol/KillJob.c (modified) (1 diff)
-
pcontrol/ResetJob.c (modified) (1 diff)
-
pcontrol/StartJob.c (modified) (1 diff)
-
pcontrol/StopHosts.c (modified) (4 diffs)
-
pcontrol/pcontrol.c (modified) (3 diffs)
-
scripts/psched.pro (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/Ohana/src/opihi/include/pcontrol.h
r4762 r4763 168 168 int VerboseMode (); 169 169 int StartJob (Job *job); 170 void gotsignal (int signum); 171 int HarvestHost (int pid); -
trunk/Ohana/src/opihi/include/psched.h
r4762 r4763 167 167 CommandF *FindControllerCommand (char *cmd); 168 168 int QuitController (); 169 int StopController (); 169 170 int VerboseMode (); 170 171 int KillLocalJob (Job *job); … … 175 176 int CheckControllerStatus (); 176 177 int TestElapsedCheck (); 178 void gotsignal (int signum); 177 179 -
trunk/Ohana/src/opihi/include/shell.h
r4751 r4763 58 58 int is_list PROTO((char *line)); 59 59 int is_loop PROTO((char *line)); 60 int is_task PROTO((char *line)); 61 int is_task_exit PROTO((char *line)); 62 int is_task_exec PROTO((char *line)); 60 63 int is_macro_create PROTO((char *line)); 61 64 void InitLists PROTO(()); -
trunk/Ohana/src/opihi/lib.shell/ListOps.c
r4714 r4763 126 126 } 127 127 128 int is_task (char *line) { 129 130 int status; 131 char *comm; 132 133 comm = thisword (line); 134 if (comm == (char *) NULL) return (FALSE); 135 136 status = !strcmp (comm, "task"); 137 free (comm); 138 return (status); 139 } 140 141 int is_task_exit (char *line) { 142 143 int status; 144 char *comm; 145 146 comm = thisword (line); 147 if (comm == (char *) NULL) return (FALSE); 148 149 status = !strcmp (comm, "task.exit"); 150 free (comm); 151 return (status); 152 } 153 154 int is_task_exec (char *line) { 155 156 int status; 157 char *comm; 158 159 comm = thisword (line); 160 if (comm == (char *) NULL) return (FALSE); 161 162 status = !strcmp (comm, "task.exec"); 163 free (comm); 164 return (status); 165 } 128 166 129 167 int is_list (char *line) { … … 135 173 status |= is_for_loop (line); 136 174 status |= is_loop (line); 175 status |= is_task (line); 176 status |= is_task_exit (line); 177 status |= is_task_exec (line); 137 178 138 179 return (status); -
trunk/Ohana/src/opihi/lib.shell/opihi.c
r4689 r4763 19 19 20 20 line = readline (prompt); 21 21 22 if (line == NULL) { 22 23 -
trunk/Ohana/src/opihi/pantasks/CheckController.c
r4762 r4763 38 38 gettimeofday (&stop, (void *) NULL); 39 39 dtime = DTIME (stop, start); 40 if (VerboseMode()) fprintf (stderr, "check stack %f\n", dtime); 40 if (VerboseMode()) fprintf (stderr, "check exit stack %f\n", dtime); 41 /* if (Njobs) fprintf (stderr, "check exit stack %f\n", dtime); */ 41 42 gettimeofday (&start, (void *) NULL); 42 43 … … 61 62 gettimeofday (&stop, (void *) NULL); 62 63 dtime = DTIME (stop, start); 63 if (VerboseMode()) fprintf (stderr, "check %d jobs %f\n", i, dtime); 64 /* if (VerboseMode()) fprintf (stderr, "clear %d exit jobs %f\n", i, dtime); */ 65 gettimeofday (&start, (void *) NULL); 64 66 65 67 if (TestElapsedCheck()) return (TRUE); … … 85 87 } 86 88 89 gettimeofday (&stop, (void *) NULL); 90 dtime = DTIME (stop, start); 91 /* if (VerboseMode()) fprintf (stderr, "check crash stack %f\n", dtime); */ 92 gettimeofday (&start, (void *) NULL); 93 87 94 p = buffer.buffer; 88 95 for (i = 0; (i < Njobs) && !TestElapsedCheck(); i++) { … … 104 111 } 105 112 FreeIOBuffer (&buffer); 113 114 gettimeofday (&stop, (void *) NULL); 115 dtime = DTIME (stop, start); 116 /* if (VerboseMode()) fprintf (stderr, "clear %d crash jobs %f\n", i, dtime); */ 106 117 return (TRUE); 107 118 } -
trunk/Ohana/src/opihi/pantasks/CheckJobs.c
r4762 r4763 17 17 switch (status) { 18 18 case JOB_PENDING: 19 if (VerboseMode()) fprintf (stderr, "job %s (%d) pending\n", job[0].task[0].name, job[0].JobID);19 /* if (VerboseMode()) fprintf (stderr, "job %s (%d) pending\n", job[0].task[0].name, job[0].JobID); */ 20 20 break; 21 21 22 22 case JOB_BUSY: 23 if (VerboseMode()) fprintf (stderr, "job %s (%d) busy\n", job[0].task[0].name, job[0].JobID);23 /* if (VerboseMode()) fprintf (stderr, "job %s (%d) busy\n", job[0].task[0].name, job[0].JobID); */ 24 24 break; 25 25 -
trunk/Ohana/src/opihi/pantasks/CheckSystem.c
r4762 r4763 9 9 gettimeofday (&start, (void *) NULL); 10 10 11 if (Ncheck < 5) {11 if (Ncheck < 20) { 12 12 CheckTasks (); 13 13 CheckJobs (); -
trunk/Ohana/src/opihi/pantasks/ControllerOps.c
r4762 r4763 25 25 gettimeofday (&stop, (void *) NULL); 26 26 dtime = DTIME (stop, start); 27 if (VerboseMode()) fprintf (stderr, "check job status %f\n", dtime);27 /* if (VerboseMode()) fprintf (stderr, "check job status %f\n", dtime); */ 28 28 29 29 if ((job[0].state == JOB_EXIT) || (job[0].state == JOB_CRASH)) { … … 32 32 gettimeofday (&stop, (void *) NULL); 33 33 dtime = DTIME (stop, start); 34 if (VerboseMode()) fprintf (stderr, "get stdout %f\n", dtime); 35 34 /* if (VerboseMode()) fprintf (stderr, "get stdout %f\n", dtime); */ 35 36 gettimeofday (&start, (void *) NULL); 36 37 GetJobOutput ("stderr", job[0].pid, &job[0].stderr, job[0].stderr_size); 38 gettimeofday (&stop, (void *) NULL); 39 dtime = DTIME (stop, start); 40 /* if (VerboseMode()) fprintf (stderr, "get stderr %f\n", dtime); */ 41 42 gettimeofday (&start, (void *) NULL); 37 43 DeleteControllerJob (job); 44 gettimeofday (&stop, (void *) NULL); 45 dtime = DTIME (stop, start); 46 /* if (VerboseMode()) fprintf (stderr, "delete job %f\n", dtime); */ 38 47 } 39 48 return (TRUE); … … 138 147 if (status == -1) return (CONTROLLER_HUNG); 139 148 140 if (VerboseMode()) fprintf (stderr, "message received (GetJobOutput : %s)\n", cmd);149 /* if (VerboseMode()) fprintf (stderr, "message received (GetJobOutput : %s)\n", cmd); */ 141 150 /* drop extra bytes from pcontrol (not pclient:job) */ 142 151 buffer[0].Nbuffer = Nstart + Nbytes; … … 328 337 FlushIOBuffer (buffer); 329 338 330 if (VerboseMode()) fprintf (stderr, "send: %s\n", cmd);331 332 339 /* send command, is pipe still open? */ 333 340 status = write_fmt (stdin_cntl, "%s\n", cmd); 334 341 if ((status == -1) && (errno == EPIPE)) { 335 ControllerStatus = FALSE;342 StopController (); 336 343 if (VerboseMode()) fprintf (stderr, "controller is down\n"); 337 344 return (FALSE); … … 347 354 } 348 355 if (status == 0) { 349 ControllerStatus = FALSE;356 StopController (); 350 357 if (VerboseMode()) fprintf (stderr, "controller is down\n"); 351 358 return (FALSE); 352 359 } 353 360 if (status == -1) { 361 StopController (); 354 362 if (VerboseMode()) fprintf (stderr, "controller is not responding\n"); 355 363 return (FALSE); … … 362 370 bzero (buffer[0].buffer + buffer[0].Nbuffer, buffer[0].Nalloc - buffer[0].Nbuffer); 363 371 } 364 if (VerboseMode()) fprintf (stderr, "message received, %d cycles\n", i);372 /* if (VerboseMode()) fprintf (stderr, "message received, %d cycles\n", i); */ 365 373 return (TRUE); 366 374 } … … 428 436 int QuitController () { 429 437 430 int i, status, waitstatus, result;438 int status; 431 439 char cmd[128]; 432 440 IOBuffer buffer; … … 436 444 sprintf (cmd, "quit"); 437 445 InitIOBuffer (&buffer, 0x100); 438 status = ControllerCommand (cmd, CONTROLLER_PROMPT, &buffer); 439 FreeIOBuffer (&buffer); 446 status = ControllerCommand (cmd, "", &buffer); 447 FreeIOBuffer (&buffer); 448 449 /* the quit command does not return a prompt, so we always 450 get an error on the controller here */ 451 StopController (); 452 return (TRUE); 453 } 454 455 int StopController () { 456 457 int i, waitstatus, result; 458 459 if (!ControllerStatus) return (TRUE); 440 460 441 461 ControllerStatus = FALSE; … … 451 471 FreeIOBuffer (&stdout_buffer); 452 472 FreeIOBuffer (&stderr_buffer); 453 454 return (TRUE); 455 } 473 return (TRUE); 474 } -
trunk/Ohana/src/opihi/pantasks/Makefile
r4748 r4763 51 51 $(SDIR)/controller_status.$(ARCH).o \ 52 52 $(SDIR)/controller_output.$(ARCH).o \ 53 $(SDIR)/controller_pulse.$(ARCH).o \ 53 54 $(SDIR)/task.$(ARCH).o \ 54 55 $(SDIR)/task_host.$(ARCH).o \ -
trunk/Ohana/src/opihi/pantasks/controller.c
r4693 r4763 6 6 int controller_check PROTO((int, char **)); 7 7 int controller_output PROTO((int, char **)); 8 int controller_pulse PROTO((int, char **)); 8 9 9 10 static Command controller_cmds[] = { … … 13 14 {"status", controller_status, "check controller status"}, 14 15 {"output", controller_output, "print controller output"}, 16 {"pulse", controller_pulse, "set controller pulse"}, 15 17 }; 16 18 -
trunk/Ohana/src/opihi/pantasks/controller_host.c
r4714 r4763 22 22 return (TRUE); 23 23 } 24 25 /* should I keep an internal host table so I can reload the 26 hosts if the controller exits? 27 28 alternatively, that could be a user-level choice 29 */ -
trunk/Ohana/src/opihi/pantasks/controller_status.c
r4706 r4763 22 22 InitIOBuffer (&buffer, 0x100); 23 23 status = ControllerCommand (command, CONTROLLER_PROMPT, &buffer); 24 if (status) fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr); 24 if (status) { 25 fwrite (buffer.buffer, 1, buffer.Nbuffer, stderr); 26 } else { 27 fprintf (stderr, "controller is down\n"); 28 } 25 29 FreeIOBuffer (&buffer); 26 30 return (TRUE); 27 28 31 } -
trunk/Ohana/src/opihi/pantasks/psched.c
r4714 r4763 41 41 42 42 signal (SIGINT, SIG_IGN); 43 signal (SIGPIPE, gotsignal); 44 signal (SIGTSTP, gotsignal); 45 signal (SIGTTIN, gotsignal); 43 46 return; 44 47 } … … 57 60 return; 58 61 } 62 63 void gotsignal (int signum) { 64 fprintf (stderr, "got signal : %d\n", signum); 65 return; 66 } -
trunk/Ohana/src/opihi/pclient/ChildOps.c
r4762 r4763 1 1 # include "pclient.h" 2 #include <sys/ioctl.h> 3 #include <sys/types.h> 4 #include <unistd.h> 5 #include <stropts.h> 2 6 3 7 static int Nbad = 0; … … 29 33 struct timeval now; 30 34 31 /* this is really lame : check if we are calling too quickly 32 this is unneeded: pclient.c rl_keyboard_input_timeout limits 33 the rate 34 gettimeofday (&now, NULL); 35 dtime = DTIME (now, last); 35 /* runaway test - if pcontrol is killed, pclient starts running away. this test is a bit 36 dangerous: the choice of dtime probably depends on the processor and the value provided to 37 pclient.c:rl_set_keyboard_input_timeout (1000); note that we cannot use getppid == 1 as a test 38 because the parent of pclient is the ssh process on the pclient host, not pcontrol. in any 39 case, the opihi shell catches if the ssh dies using getppid 40 */ 41 gettimeofday (&now, (void *) NULL); 42 dtime = 1e6*DTIME (now, last); 43 if (dtime < 100) { 44 Nbad ++; 45 if (Nbad > 10) { 46 fprintf (stderr, "runaway!\n"); 47 exit (2); 48 } 49 } 50 if (dtime > 950) Nbad = 0; 36 51 last = now; 37 if (dtime < 0.0001) Nbad ++;38 if (dtime > 0.01) Nbad = 0;39 if (Nbad > 10) exit (2);40 */41 42 /* this is a bit lame : we must exit if calling process exits */43 ppid = getppid();44 if (ppid == 1) exit (2);45 52 46 53 CheckChildStatus (); -
trunk/Ohana/src/opihi/pclient/pclient.c
r4762 r4763 21 21 rl_event_hook = CheckChild; 22 22 rl_set_keyboard_input_timeout (1000); 23 /* 1 ms seems to be the minimum valid number */ 23 24 24 25 set_str_variable ("HISTORY", opihi_history); -
trunk/Ohana/src/opihi/pcontrol/CheckBusyJob.c
r4689 r4763 20 20 switch (status) { 21 21 case PCLIENT_DOWN: 22 HarvestHost (host[0].pid); 22 23 UnlinkJobAndHost (job); 23 24 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); -
trunk/Ohana/src/opihi/pcontrol/CheckDoneHost.c
r4575 r4763 14 14 switch (status) { 15 15 case PCLIENT_DOWN: 16 /** do we need to close the connection? **/ 16 if (VerboseMode()) fprintf (stderr, "host %s is down\n", host[0].hostname); 17 /* DONE host does not have an incomplete job */ 18 HarvestHost (host[0].pid); 17 19 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); 18 if (VerboseMode()) fprintf (stderr, "host %s is down\n", host[0].hostname);19 20 FreeIOBuffer (&buffer); 20 21 return (FALSE); 22 /** do we need to close the connection? **/ 21 23 22 24 case PCLIENT_HUNG: -
trunk/Ohana/src/opihi/pcontrol/CheckHost.c
r4762 r4763 13 13 case 0: 14 14 if (VerboseMode()) fprintf (stderr, "host %s is down\n", host[0].hostname); 15 16 /* if host has a job, job is dead, push to Pending */ 17 if (host[0].stack == PCONTROL_HOST_BUSY) { 18 job = (Job *) host[0].job; 19 if (job != NULL) { 20 N = FindJob (job[0].JobID, PCONTROL_JOB_BUSY); 21 if (N < 0) { 22 fprintf (stderr, "programming error: job is not found in BUSY list\n"); 23 exit (2); 24 } 25 job[0].host = NULL; /* unlink host & job */ 26 PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM); 27 } 15 /* if host has a job, job is dead, return to Pending */ 16 job = (Job *) host[0].job; 17 if (job != NULL) { 18 UnlinkJobAndHost (job); 19 PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM); 28 20 } 29 host[0].job = NULL;21 HarvestHost (host[0].pid); 30 22 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); 31 23 FreeIOBuffer (&buffer); -
trunk/Ohana/src/opihi/pcontrol/CheckSystem.c
r4762 r4763 1 1 # include "pcontrol.h" 2 2 3 static Npass = 0;3 static struct timeval lastlive = {0, 0}; 4 4 5 5 int CheckSystem () { 6 7 struct timeval now; 8 float dtime; 6 9 7 10 /* we want to give each block a maximum allowed time */ … … 14 17 CheckDownHosts(0.100); /* launch the host */ 15 18 19 /* always allow at least one test */ 16 20 /* most tests require about 2ms per host. 17 21 CheckDoneJobs must depend on the size of the output buffer */ 18 22 19 /* this is a waste of cycles: no need to do this every loop */ 20 if (Npass > 20) { 23 gettimeofday (&now, (void *) NULL); 24 dtime = DTIME (now, lastlive); 25 if (dtime > 1.0) { 21 26 CheckLiveHosts(0.040); 22 Npass = 0; 23 } else { 24 Npass ++; 25 } 27 lastlive = now; 28 } 26 29 27 30 if (0) { -
trunk/Ohana/src/opihi/pcontrol/KillJob.c
r4450 r4763 18 18 switch (status) { 19 19 case PCLIENT_DOWN: 20 HarvestHost (host[0].pid); 20 21 UnlinkJobAndHost (job); 22 PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM); 21 23 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); 22 PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM);23 24 FreeIOBuffer (&buffer); 24 25 return (FALSE); -
trunk/Ohana/src/opihi/pcontrol/ResetJob.c
r4450 r4763 20 20 switch (status) { 21 21 case PCLIENT_DOWN: 22 /*** different behavior for ANYHOST, WANTHOST, NEEDHOST ***/22 /*** different behavior for ANYHOST, WANTHOST, NEEDHOST? ***/ 23 23 fprintf (stderr, "host %s is down\n", host[0].hostname); 24 HarvestHost (host[0].pid); 25 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); 24 26 FreeIOBuffer (&buffer); 25 27 return (FALSE); -
trunk/Ohana/src/opihi/pcontrol/StartJob.c
r4575 r4763 88 88 job[0].host = NULL; 89 89 host[0].job = NULL; 90 HarvestHost (host[0].pid); 90 91 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); 91 92 PutJob (job, PCONTROL_JOB_PENDING, STACK_BOTTOM); -
trunk/Ohana/src/opihi/pcontrol/StopHosts.c
r4762 r4763 29 29 int StopHost (Host *host) { 30 30 31 int result;32 int waitstatus;33 31 int status; 34 32 IOBuffer buffer; … … 53 51 exit (1); 54 52 } 53 HarvestHost (host[0].pid); 54 return (TRUE); 55 } 55 56 56 /* check current child status */ 57 void DownHost (Host *host) { 58 CLOSE (host[0].stdin); 59 CLOSE (host[0].stdout); 60 CLOSE (host[0].stderr); 61 host[0].job = NULL; 62 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM); 63 } 64 65 void OffHost (Host *host) { 66 CLOSE (host[0].stdin); 67 CLOSE (host[0].stdout); 68 CLOSE (host[0].stderr); 69 host[0].job = NULL; 70 PutHost (host, PCONTROL_HOST_OFF, STACK_BOTTOM); 71 } 72 73 int HarvestHost (int pid) { 74 75 int result; 76 int waitstatus; 77 57 78 /* I probably should loop a few time with max timeout larger than 10ms... */ 58 79 usleep (10000); 59 result = waitpid ( host[0].pid, &waitstatus, WNOHANG);80 result = waitpid (pid, &waitstatus, WNOHANG); 60 81 switch (result) { 61 82 case -1: /* error with waitpid */ … … 83 104 84 105 default: 85 if (result != host[0].pid) {86 fprintf (stderr, "waitpid error: mis-matched PID (%d vs %d). programming error\n", result, host[0].pid);106 if (result != pid) { 107 fprintf (stderr, "waitpid error: mis-matched PID (%d vs %d). programming error\n", result, pid); 87 108 exit (1); 88 109 } … … 101 122 return (TRUE); 102 123 } 103 104 void DownHost (Host *host) {105 CLOSE (host[0].stdin);106 CLOSE (host[0].stdout);107 CLOSE (host[0].stderr);108 host[0].job = NULL;109 PutHost (host, PCONTROL_HOST_DOWN, STACK_BOTTOM);110 }111 112 void OffHost (Host *host) {113 CLOSE (host[0].stdin);114 CLOSE (host[0].stdout);115 CLOSE (host[0].stderr);116 host[0].job = NULL;117 PutHost (host, PCONTROL_HOST_OFF, STACK_BOTTOM);118 } -
trunk/Ohana/src/opihi/pcontrol/pcontrol.c
r4762 r4763 19 19 rl_attempted_completion_function = command_completer; 20 20 rl_event_hook = CheckSystem; 21 rl_set_keyboard_input_timeout (1000 00);21 rl_set_keyboard_input_timeout (1000); 22 22 23 23 set_str_variable ("HISTORY", opihi_history); … … 30 30 /* ignore the history file. to change this, see, eg, mana.c */ 31 31 signal (SIGINT, SIG_IGN); 32 signal (SIGPIPE, gotsignal); 33 signal (SIGTSTP, gotsignal); 34 signal (SIGTTIN, gotsignal); 32 35 return; 33 36 } … … 44 47 return; 45 48 } 49 50 void gotsignal (int signum) { 51 fprintf (stderr, "got signal : %d\n", signum); 52 return; 53 } -
trunk/Ohana/src/opihi/scripts/psched.pro
r4714 r4763 1 1 2 2 controller exit true 3 controller host add kiawe 4 controller host add alala 3 # controller host add kiawe 4 $Ntest = 0 5 # controller host add alala 5 6 # verbose on 7 pulse 1000 8 controller pulse 1000 9 10 macro load.machines 11 if ($0 != 2) 12 echo "load.machines (nmach)" 13 break 14 end 15 16 for i 0 $1 17 $n = $i + 1 18 sprintf host "po%02d" $n 19 controller host add $host 20 end 21 end 6 22 7 23 task test 8 24 command partest 9 periods -poll 0.1 10 periods -exec 0.1 25 # polling period is no longer valid: we check for completed controller tasks 26 # correction: still valid for local tasks 27 periods -poll 0.20 28 periods -exec 0.001 11 29 periods -timeout 10.0 12 nmax 530 nmax 1024 13 31 host anyhost 14 32 … … 18 36 queuedelete stdout 19 37 queuedelete stderr 20 memory leaks 38 date date 39 queuepush done "$date" 40 $Ntest ++ 41 # memory leaks 21 42 # queuesize stdout -var Nstdout 22 43 # for i 0 $Nstdout … … 42 63 end 43 64 end 65 66 # pulse == 100ms 67 # poll/exit = 0.2 : 29 sec / 100 jobs 68 # poll/exit = 0.1 : 20 sec / 100 jobs 69 # poll/exit = 0.05 : 17 sec / 100 jobs 70 # poll/exit = 0.01 : 18 sec / 100 jobs 71 72 # pulse == 10ms 73 # poll/exit = 0.2 : 20 sec / 100 jobs 74 # poll/exit = 0.10 : 12 sec / 100 jobs 75 # poll/exit = 0.05 : 12 sec / 100 jobs 76 # poll/exit = 0.01 : 9 sec / 100 jobs 77 78 # we are limited here by how quickly we can send data to the 79 # controller. this is limited by the occasional 'CheckSystem' 80 # loops, with ~40ms minimum. 81 82 # seems to be faster on po01 from kiawe (less interference?) 83 84 # pulse == 1ms, controller pulse == 1ms 85 # poll/exit = 0.01 : 3 sec / 100 jobs 86 # 2 mach, 3 sec 87 # 4 mach, 3 sec 88 # 8 mach, 3 sec 89 90 # 16 machines, 500 jobs, 13 sec: 26ms / job 91 # 32 machines, 1024 jobs, 26 sec: 26ms / job 92 # job harvesting rate is still the limitation. Each job harvest requires: 93 # - jobstack exit 94 # - stdout 95 # - stderr 96 # - delete 97 # - jobstack crash 98
Note:
See TracChangeset
for help on using the changeset viewer.
