When a job is requeued during the prologue step, be sure that the other moms involved in a multinode allocation find out. Otherwise they will report errors when the job is rerun on them later. --- pbs-2.3.12-pw/src/include/mom_func.h | 1 + pbs-2.3.12-pw/src/resmom/mom_comm.c | 4 +--- pbs-2.3.12-pw/src/resmom/start_exec.c | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff -puN src/include/mom_func.h~prologue-bounce src/include/mom_func.h --- pbs-2.3.12/src/include/mom_func.h~prologue-bounce 2004-04-17 11:02:33.000000000 -0400 +++ pbs-2.3.12-pw/src/include/mom_func.h 2004-04-17 11:02:33.000000000 -0400 @@ -157,6 +157,7 @@ extern int run_pelog A_((int which, ch extern int is_joined A_((job *)); extern void check_busy A_((double)); extern void state_to_server A_((int)); +extern int send_sisters(const job *pjob, int com); /* Defines for pe_io_type, see run_pelog() */ diff -puN src/resmom/mom_comm.c~prologue-bounce src/resmom/mom_comm.c --- pbs-2.3.12/src/resmom/mom_comm.c~prologue-bounce 2004-04-17 11:02:33.000000000 -0400 +++ pbs-2.3.12-pw/src/resmom/mom_comm.c 2004-04-17 11:02:33.000000000 -0400 @@ -467,9 +467,7 @@ im_compose(stream, jobid, cookie, comman ** the job -> pjob. */ int -send_sisters(pjob, com) - job *pjob; - int com; +send_sisters(const job *pjob, int com) { int i, num, ret; eventent *ep; diff -puN src/resmom/start_exec.c~prologue-bounce src/resmom/start_exec.c --- pbs-2.3.12/src/resmom/start_exec.c~prologue-bounce 2004-04-17 11:02:33.000000000 -0400 +++ pbs-2.3.12-pw/src/resmom/start_exec.c 2004-04-17 11:03:39.000000000 -0400 @@ -314,6 +314,7 @@ int mom_restart_job(pjob, path) void exec_bail(job *pjob, int code) { + (void) send_sisters(pjob, IM_ABORT_JOB); pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITING; pjob->ji_qs.ji_un.ji_momt.ji_exitstat = code; exiting_tasks = 1; _