/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. * Copyright (c) 2024 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "src/include/pmix_config.h" #include "include/pmix.h" #include #include #include #include #include "src/class/pmix_object.h" #include "src/util/pmix_argv.h" #include "src/include/pmix_globals.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" static uint32_t nprocs; static pmix_proc_t myproc; static uint32_t getcount = 0; static int msgnum = 0; #define PMIX_WAIT_FOR_COMPLETION(a) \ do { \ while ((a)) { \ usleep(10); \ } \ } while (0) static void opcbfunc(pmix_status_t status, void *cbdata) { bool *active = (bool *) cbdata; PMIX_HIDE_UNUSED_PARAMS(status); pmix_output(0, "Rank %d[msg=%d]: completed fence_nb", myproc.rank, msgnum); *active = false; } static void valcbfunc(pmix_status_t status, pmix_value_t *val, void *cbdata) { char *key = (char *) cbdata; if (PMIX_SUCCESS == status) { if (NULL != strstr(key, "local")) { if (PMIX_UINT64 != val->type) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb Key %s returned wrong type: %d", myproc.rank, msgnum, key, val->type); ++msgnum; goto done; } if (1234 != val->data.uint64) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb Key %s returned wrong value: %d", myproc.rank, msgnum, key, (int) val->data.uint64); ++msgnum; goto done; } } else if (NULL != strstr(key, "remote")) { if (PMIX_STRING != val->type) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb Key %s returned wrong type: %d", myproc.rank, msgnum, key, val->type); ++msgnum; goto done; } if (0 != strcmp(val->data.string, "1234")) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb Key %s returned wrong value: %s", myproc.rank, msgnum, key, val->data.string); ++msgnum; goto done; } } else { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb returned wrong key: %s", myproc.rank, msgnum, key); ++msgnum; goto done; } pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb Key %s returned correctly", myproc.rank, msgnum, key); } else { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get_nb Key %s failed: %s", myproc.rank, msgnum, key, PMIx_Error_string(status)); ++msgnum; } done: free(key); getcount++; } int main(int argc, char **argv) { int rc, i; pmix_value_t value; pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; uint32_t n, num_gets, k, nlocal, sleeptime; bool active; bool dofence = true; bool local, all_local; char **peers; pmix_rank_t *locals = NULL; PMIX_HIDE_UNUSED_PARAMS(argc, argv); pmix_info_t timeout, *iptr; size_t ninfo; dofence = false; sleeptime = 2; iptr = NULL; ninfo = 0; for (i=1; i < argc; i++) { if (0 == strcmp(argv[i], "-s") || 0 == strcmp(argv[i], "--sleep")) { if (NULL == argv[i+1]) { fprintf(stderr, "Error: %s requires an integer argument\n", argv[i]); exit(1); } sleeptime = strtoul(argv[i+1], NULL, 10); } else if (0 == strcmp(argv[i], "-f") || 0 == strcmp(argv[i], "--fence")) { dofence = true; } else if (0 == strcmp(argv[i], "-t") || 0 == strcmp(argv[i], "--timeout")) { if (NULL == argv[i+1]) { fprintf(stderr, "Error: %s requires an integer argument\n", argv[i]); exit(1); } rc = strtoul(argv[i+1], NULL, 10); PMIX_INFO_LOAD(&timeout, PMIX_TIMEOUT, &rc, PMIX_INT); iptr = &timeout; ninfo = 1; } } /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { pmix_output(0, "Rank %d: PMIx_Init failed: %d", myproc.rank, rc); exit(1); } pmix_output(0, "Rank %d[msg=%d]: Running", myproc.rank, msgnum); ++msgnum; /* get our job size */ pmix_strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get job size failed: %s", myproc.rank, msgnum, PMIx_Error_string(rc)); ++msgnum; goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); pmix_output(0, "Rank %d[msg=%d]: job size %d", myproc.rank, msgnum, nprocs); ++msgnum; /* put a few values */ if (0 > asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank)) { errno = ENOMEM; abort(); } value.type = PMIX_UINT32; value.data.uint32 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Store_internal failed: %d", myproc.rank, msgnum, rc); ++msgnum; goto done; } if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { errno = ENOMEM; abort(); } value.type = PMIX_UINT64; value.data.uint64 = 1234; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Put internal failed: %d", myproc.rank, msgnum, rc); ++msgnum; goto done; } if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { errno = ENOMEM; abort(); } value.type = PMIX_STRING; value.data.string = "1234"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Put internal failed: %d", myproc.rank, msgnum, rc); ++msgnum; goto done; } /* introduce a delay by one rank so we can check what happens * if a "get" is received prior to data being provided */ if (0 == myproc.rank) { sleep(sleeptime); pmix_output(0, "Rank 0[msg=%d]: WOKE UP", msgnum); ++msgnum; } /* commit the data to the server */ if (PMIX_SUCCESS != (rc = PMIx_Commit())) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Commit failed: %d", myproc.rank, msgnum, rc); goto done; ++msgnum; } if (dofence) { /* call fence_nb, but don't return any data */ PMIX_PROC_CONSTRUCT(&proc); pmix_strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; active = true; if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(&proc, 1, NULL, 0, opcbfunc, &active))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Fence failed: %d", myproc.rank, msgnum, rc); ++msgnum; goto done; } } /* get a list of our local peers */ if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get local peers failed: %s", myproc.rank, msgnum, PMIx_Error_string(rc)); ++msgnum; goto done; } /* split the returned string to get the rank of each local peer */ peers = PMIx_Argv_split(val->data.string, ','); PMIX_VALUE_RELEASE(val); nlocal = PMIx_Argv_count(peers); if (nprocs == nlocal) { all_local = true; } else { all_local = false; locals = (pmix_rank_t *) malloc(PMIx_Argv_count(peers) * sizeof(pmix_rank_t)); for (n = 0; NULL != peers[n]; n++) { locals[n] = strtoul(peers[n], NULL, 10); } } PMIx_Argv_free(peers); /* get the committed data - ask for someone who doesn't exist as well */ num_gets = 0; for (n = 0; n < nprocs; n++) { if (all_local) { local = true; } else { local = false; /* see if this proc is local to us */ for (k = 0; k < nlocal; k++) { if (proc.rank == locals[k]) { local = true; break; } } } if (local) { if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, n)) { errno = ENOMEM; abort(); } pmix_output(0, "Rank %u[msg=%d]: retrieving %s from local proc %u", myproc.rank, msgnum, tmp, n); ++msgnum; proc.rank = n; if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, iptr, ninfo, valcbfunc, tmp))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get %s failed: %d", myproc.rank, msgnum, tmp, rc); ++msgnum; goto done; } ++num_gets; } else { if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, n)) { errno = ENOMEM; abort(); } pmix_output(0, "Rank %u[msg=%d]: retrieving %s from remote proc %u", myproc.rank, msgnum, tmp, n); if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, iptr, ninfo, valcbfunc, tmp))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Get %s failed: %d", myproc.rank, msgnum, tmp, rc); ++msgnum; goto done; } ++num_gets; } } if (dofence) { /* wait for the first fence to finish */ PMIX_WAIT_FOR_COMPLETION(active); } /* wait for all my "get" calls to complete */ while (getcount < num_gets) { struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 100000; nanosleep(&ts, NULL); } /* call fence again so everyone waits before leaving */ proc.rank = PMIX_RANK_WILDCARD; if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { pmix_output(0, "Rank %d[msg=%d]: PMIx_Fence failed: %d", myproc.rank, msgnum, rc); ++msgnum; goto done; } done: /* finalize us */ pmix_output(0, "Rank %d[msg=%d]: Finalizing", myproc.rank, msgnum); ++msgnum; if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Rank %d[msg=%d]:PMIx_Finalize failed: %d\n", myproc.rank, msgnum, rc); } else { fprintf(stderr, "Rank %d[msg=%d]: PMIx_Finalize successfully completed\n", myproc.rank, msgnum); } fflush(stderr); return (0); }