/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "src/include/pmix_config.h" #include "include/pmix.h" #include #include #include #include #include "src/class/pmix_object.h" #include "src/include/pmix_globals.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" static pmix_proc_t myproc; static bool completed; static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { size_t n; PMIX_HIDE_UNUSED_PARAMS(evhdlr_registration_id, results, nresults); pmix_output(0, "Client %s:%d NOTIFIED with status %s source %s:%d and %d info", myproc.nspace, myproc.rank, PMIx_Error_string(status), source->nspace, source->rank, (int) ninfo); for (n = 0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_PROCID, PMIX_MAX_KEYLEN) && PMIX_PROC == info[n].value.type) { pmix_output(0, "[%s:%d] added proc: %s:%d", myproc.nspace, myproc.rank, info[n].value.data.proc->nspace, info[n].value.data.proc->rank); } else { pmix_output(0, "[%s:%d] key: %s", myproc.nspace, myproc.rank, info[n].key); } } if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); } completed = true; } static void op_callbk(pmix_status_t status, void *cbdata) { PMIX_HIDE_UNUSED_PARAMS(cbdata); pmix_output(0, "CLIENT: OP CALLBACK CALLED WITH STATUS %d", status); } static void errhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata) { PMIX_HIDE_UNUSED_PARAMS(cbdata); pmix_output(0, "Client: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", status, (unsigned long) errhandler_ref); } int main(int argc, char **argv) { int rc; pmix_value_t value; pmix_value_t *val = &value; pmix_proc_t proc; uint32_t nprocs; pmix_status_t code[6] = {PMIX_ERR_PROC_ABORTING, PMIX_ERR_PROC_ABORTED, PMIX_ERR_PROC_REQUESTED_ABORT, PMIX_ERR_JOB_TERMINATED, PMIX_ERR_UNREACH, PMIX_ERR_LOST_CONNECTION}; bool fail_early = false; bool fail_after_participate = false; int opt; if (1 == argc) { fail_early = true; } else if (2 == argc) { opt = strtol(argv[1], NULL, 10); switch(opt) { case 0: fail_early = true; break; case 1: fail_after_participate = true; break; default: fprintf(stderr, "Unknown case\n"); exit(1); } } /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc); exit(0); } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our job size */ pmix_strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ PMIx_Register_event_handler(code, 6, NULL, 0, notification_fn, errhandler_reg_callbk, NULL); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); pmix_strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc); goto done; } if (0 == myproc.rank) { if (fail_early) { // wait a little bit to let someone else start the collective usleep(1000); pmix_output(0, "Client ns %s rank %d: bye-bye!", myproc.nspace, myproc.rank); exit(1); } else if (fail_after_participate) { usleep(500000); pmix_output(0, "Client ns %s rank %d: bye-bye!", myproc.nspace, myproc.rank); exit(1); } } else { if (fail_early) { if (1 != myproc.rank) { // wait a little bit to let the other guy start the collective usleep(2000); } pmix_output(0, "Rank %u calling Fence", myproc.rank); rc = PMIx_Fence(&proc, 1, NULL, 0); // let the user know pmix_output(0, "Rank %u: Fence returned %d(%s)", myproc.rank, rc, PMIx_Error_string(rc)); // give the server a chance to learn of the problem while (!completed) { struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } } } done: /* finalize us */ pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); PMIx_Deregister_event_handler(1, op_callbk, NULL); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } else { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); } fflush(stderr); return (0); }