/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2022 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Bull SAS. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "ompi_config.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/mca/coll/base/base.h" #include "ompi/mca/coll/coll.h" #include "ompi/op/op.h" #include "coll_portals4.h" #include "coll_portals4_request.h" #define REQ_COLL_TABLE_ID 15 #define REQ_COLL_FINISH_TABLE_ID 16 ptl_op_t ompi_coll_portals4_atomic_op [OMPI_OP_NUM_OF_TYPES] = { [OMPI_OP_NULL] = COLL_PORTALS4_NO_OP, [OMPI_OP_MAX] = PTL_MAX, [OMPI_OP_MIN] = PTL_MIN, [OMPI_OP_SUM] = PTL_SUM, [OMPI_OP_PROD] = PTL_PROD, [OMPI_OP_LAND] = PTL_LAND, [OMPI_OP_BAND] = PTL_BAND, [OMPI_OP_LOR] = PTL_LOR, [OMPI_OP_BOR] = PTL_BOR, [OMPI_OP_LXOR] = PTL_LXOR, [OMPI_OP_BXOR] = PTL_BXOR, [OMPI_OP_MAXLOC] = COLL_PORTALS4_NO_OP, [OMPI_OP_MINLOC] = COLL_PORTALS4_NO_OP, [OMPI_OP_REPLACE] = PTL_CSWAP, }; ptl_datatype_t ompi_coll_portals4_atomic_datatype [OMPI_DATATYPE_MPI_MAX_PREDEFINED] = { [OMPI_DATATYPE_MPI_EMPTY] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_UINT8_T] = PTL_UINT8_T, [OMPI_DATATYPE_MPI_INT16_T] = PTL_INT16_T, [OMPI_DATATYPE_MPI_UINT16_T] = PTL_UINT16_T, [OMPI_DATATYPE_MPI_INT32_T] = PTL_INT32_T, [OMPI_DATATYPE_MPI_UINT32_T] = PTL_UINT32_T, [OMPI_DATATYPE_MPI_INT64_T] = PTL_INT64_T, [OMPI_DATATYPE_MPI_UINT64_T] = PTL_UINT64_T, [OMPI_DATATYPE_MPI_FLOAT] = PTL_FLOAT, [OMPI_DATATYPE_MPI_DOUBLE] = PTL_DOUBLE, [OMPI_DATATYPE_MPI_LONG_DOUBLE] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_COMPLEX4] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_COMPLEX8] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_COMPLEX16] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_COMPLEX32] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_WCHAR] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_PACKED] = COLL_PORTALS4_NO_DTYPE, /* C++ / C99 datatypes */ [OMPI_DATATYPE_MPI_BOOL] = COLL_PORTALS4_NO_DTYPE, /* Fortran datatypes */ [OMPI_DATATYPE_MPI_LOGICAL] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_CHARACTER] = PTL_INT8_T, [OMPI_DATATYPE_MPI_INTEGER] = PTL_INT64_T, [OMPI_DATATYPE_MPI_REAL] = PTL_FLOAT, [OMPI_DATATYPE_MPI_DOUBLE_PRECISION] = PTL_DOUBLE, [OMPI_DATATYPE_MPI_COMPLEX] = PTL_FLOAT_COMPLEX, [OMPI_DATATYPE_MPI_DOUBLE_COMPLEX] = PTL_DOUBLE_COMPLEX, [OMPI_DATATYPE_MPI_LONG_DOUBLE_COMPLEX] = PTL_LONG_DOUBLE_COMPLEX, [OMPI_DATATYPE_MPI_2INT] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_2INTEGER] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_2REAL] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_2DBLPREC] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_2COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_2DOUBLE_COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_FLOAT_INT] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_DOUBLE_INT] = PTL_INT64_T, [OMPI_DATATYPE_MPI_LONG_DOUBLE_INT] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_LONG_INT] = PTL_INT32_T, [OMPI_DATATYPE_MPI_SHORT_INT] = PTL_INT16_T, /* MPI 2.2 types */ [OMPI_DATATYPE_MPI_AINT] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_OFFSET] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_C_BOOL] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_C_COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_C_FLOAT_COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_C_DOUBLE_COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_C_LONG_DOUBLE_COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_LB] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_UB] = COLL_PORTALS4_NO_DTYPE, /* MPI 3.0 types */ [OMPI_DATATYPE_MPI_COUNT] = COLL_PORTALS4_NO_DTYPE, /* Datatypes proposed to the MPI Forum in June 2017 for proposal in * the MPI 4.0 standard. As of February 2019, it is not accepted yet. * See https://github.com/mpi-forum/mpi-issues/issues/65 */ [OMPI_DATATYPE_MPI_SHORT_FLOAT] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = COLL_PORTALS4_NO_DTYPE, [OMPI_DATATYPE_MPI_UNAVAILABLE] = COLL_PORTALS4_NO_DTYPE, }; #define PORTALS4_SAVE_PREV_COLL_API(__module, __comm, __api) \ do { \ __module->previous_ ## __api = __comm->c_coll->coll_ ## __api; \ __module->previous_ ## __api ## _module = __comm->c_coll->coll_ ## __api ## _module; \ if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) { \ opal_output_verbose(1, ompi_coll_base_framework.framework_output, \ "(%d/%s): no underlying " # __api"; disqualifying myself", \ ompi_comm_get_local_cid(__comm), __comm->c_name); \ return OMPI_ERROR; \ } \ OBJ_RETAIN(__module->previous_ ## __api ## _module); \ } while(0) const char *mca_coll_portals4_component_version_string = "Open MPI Portals 4 collective MCA component version " OMPI_VERSION; int mca_coll_portals4_priority = 10; #define MCA_COLL_PORTALS4_EQ_SIZE 4096 static int portals4_open(void); static int portals4_close(void); static int portals4_register(void); static int portals4_init_query(bool enable_progress_threads, bool enable_mpi_threads); static mca_coll_base_module_t* portals4_comm_query(struct ompi_communicator_t *comm, int *priority); static int portals4_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); static int portals4_progress(void); mca_coll_portals4_component_t mca_coll_portals4_component = { { /* First, the mca_component_t struct containing meta information * about the component itself */ .collm_version = { MCA_COLL_BASE_VERSION_2_4_0, /* Component name and version */ .mca_component_name = "portals4", MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), /* Component open and close functions */ .mca_open_component = portals4_open, .mca_close_component = portals4_close, .mca_register_component_params = portals4_register }, .collm_data = { /* The component is not checkpoint ready */ MCA_BASE_METADATA_PARAM_NONE }, /* Initialization / querying functions */ .collm_init_query = portals4_init_query, .collm_comm_query = portals4_comm_query, }, }; int opal_stderr(const char *msg, const char *file, const int line, const int ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: %s: %d\n", file, line, msg, ret); return (OMPI_ERR_TEMP_OUT_OF_RESOURCE); } static int portals4_register(void) { mca_coll_portals4_priority = 100; (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "priority", "Priority of the portals4 coll component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_portals4_priority); mca_coll_portals4_component.use_binomial_gather_algorithm = 0; (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "use_binomial_gather_algorithm", "if 1 use a binomial tree algorithm for gather, otherwise use linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_portals4_component.use_binomial_gather_algorithm); mca_coll_portals4_component.portals_max_msg_size = PTL_SIZE_MAX; (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "max_msg_size", "Max size supported by portals4 (above that, a message is cut into messages less than that size)", MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_portals4_component.portals_max_msg_size); return OMPI_SUCCESS; } static int portals4_open(void) { int ret; mca_coll_portals4_component.ni_h = PTL_INVALID_HANDLE; mca_coll_portals4_component.uid = PTL_UID_ANY; mca_coll_portals4_component.pt_idx = -1; mca_coll_portals4_component.finish_pt_idx = -1; mca_coll_portals4_component.eq_h = PTL_INVALID_HANDLE; mca_coll_portals4_component.unex_me_h = PTL_INVALID_HANDLE; mca_coll_portals4_component.finish_me_h = PTL_INVALID_HANDLE; mca_coll_portals4_component.zero_md_h = PTL_INVALID_HANDLE; mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE; OBJ_CONSTRUCT(&mca_coll_portals4_component.requests, opal_free_list_t); ret = opal_free_list_init(&mca_coll_portals4_component.requests, sizeof(ompi_coll_portals4_request_t), opal_cache_line_size, OBJ_CLASS(ompi_coll_portals4_request_t), 0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: ompi_free_list_init failed: %d\n", __FILE__, __LINE__, ret); return ret; } return OMPI_SUCCESS; } static int portals4_close(void) { int ret; OBJ_DESTRUCT(&mca_coll_portals4_component.requests); if (!PtlHandleIsEqual(mca_coll_portals4_component.zero_md_h, PTL_INVALID_HANDLE)) { ret = PtlMDRelease(mca_coll_portals4_component.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDRelease failed: %d\n", __FILE__, __LINE__, ret); } } mca_coll_portals4_component.zero_md_h = PTL_INVALID_HANDLE; if (!PtlHandleIsEqual(mca_coll_portals4_component.data_md_h, PTL_INVALID_HANDLE)) { ret = PtlMDRelease(mca_coll_portals4_component.data_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDRelease failed: %d\n", __FILE__, __LINE__, ret); } } mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE; if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) { do { ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h); } while (PTL_IN_USE == ret); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", __FILE__, __LINE__, ret); } } if (!PtlHandleIsEqual(mca_coll_portals4_component.unex_me_h, PTL_INVALID_HANDLE)) { do { ret = PtlMEUnlink(mca_coll_portals4_component.unex_me_h); } while (PTL_IN_USE == ret); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", __FILE__, __LINE__, ret); } } if (mca_coll_portals4_component.finish_pt_idx >= 0) { ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTFree failed: %d\n", __FILE__, __LINE__, ret); } } if (mca_coll_portals4_component.pt_idx >= 0) { ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTFree failed: %d\n", __FILE__, __LINE__, ret); } } if (!PtlHandleIsEqual(mca_coll_portals4_component.eq_h, PTL_INVALID_HANDLE)) { ret = PtlEQFree(mca_coll_portals4_component.eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlEQFree failed: %d\n", __FILE__, __LINE__, ret); } } if (!PtlHandleIsEqual(mca_coll_portals4_component.ni_h, PTL_INVALID_HANDLE)) { ret = PtlNIFini(mca_coll_portals4_component.ni_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlNIFini failed: %d\n", __FILE__, __LINE__, ret); } PtlFini(); } opal_progress_unregister(portals4_progress); return OMPI_SUCCESS; } /* * Initial query function that is invoked during MPI_INIT, allowing * this component to disqualify itself if it doesn't support the * required level of thread support. */ /* /!\ Called for each processes /!\ */ static int portals4_init_query(bool enable_progress_threads, bool enable_mpi_threads) { int ret; ptl_md_t md; ptl_me_t me; /* Initialize Portals and create a physical, matching interface */ ret = PtlInit(); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlInit failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_PHYSICAL | PTL_NI_MATCHING, PTL_PID_ANY, NULL, &mca_coll_portals4_component.ni_limits, &mca_coll_portals4_component.ni_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlNIInit failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size); if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size) mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size; opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size); ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlGetid failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* FIX ME: Need to make sure our ID matches with the MTL... */ ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlGetUid failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlEQAlloc(mca_coll_portals4_component.ni_h, MCA_COLL_PORTALS4_EQ_SIZE, &mca_coll_portals4_component.eq_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlEQAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlPTAlloc(mca_coll_portals4_component.ni_h, 0, mca_coll_portals4_component.eq_h, REQ_COLL_TABLE_ID, &mca_coll_portals4_component.pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n", __FILE__, __LINE__, mca_coll_portals4_component.finish_pt_idx); return OMPI_ERROR; } ret = PtlPTAlloc(mca_coll_portals4_component.ni_h, 0, mca_coll_portals4_component.eq_h, REQ_COLL_FINISH_TABLE_ID, &mca_coll_portals4_component.finish_pt_idx); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n", __FILE__, __LINE__, mca_coll_portals4_component.finish_pt_idx); return OMPI_ERROR; } /* Bind MD/MDs across all memory. We prefer (for obvious reasons) to have a single MD across all of memory */ memset(&md, 0, sizeof(ptl_md_t)); md.start = 0; md.length = 0; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(mca_coll_portals4_component.ni_h, &md, &mca_coll_portals4_component.zero_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } md.start = 0; md.length = PTL_SIZE_MAX; md.options = 0; md.eq_handle = PTL_EQ_NONE; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(mca_coll_portals4_component.ni_h, &md, &mca_coll_portals4_component.data_md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMDBind failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length)); /* setup finish ack ME */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = 0; me.ignore_bits = 0; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx, &me, PTL_PRIORITY_LIST, NULL, &mca_coll_portals4_component.finish_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* This ME is used for RTR exchange only */ me.start = NULL; me.length = 0; me.ct_handle = PTL_CT_NONE; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; /* Note : the RTR bit must be set to match this ME, * this allows to discriminate the RTR from data flow * (especially for the Barrier operations) */ COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0); me.ignore_bits = ~COLL_PORTALS4_RTR_MASK; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx, &me, PTL_OVERFLOW_LIST, NULL, &mca_coll_portals4_component.unex_me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* activate progress callback */ ret = opal_progress_register(portals4_progress); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: opal_progress_register failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } return OMPI_SUCCESS; } /* * Invoked when there's a new communicator that has been created. * Look at the communicator and decide which set of functions and * priority we want to return. */ mca_coll_base_module_t * portals4_comm_query(struct ompi_communicator_t *comm, int *priority) { mca_coll_portals4_module_t *portals4_module; ptl_process_t *proc; /* For now, we don't support intercommunicators and we probably never should handle the single proc case, since there's the self module... */ if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < 2) { return NULL; } /* Make sure someone is populating the proc table, since we're not in a really good position to do so */ proc = ompi_proc_local()->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; if (NULL == proc) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: Proc table not previously populated", __FILE__, __LINE__); return NULL; } opal_output_verbose(50, ompi_coll_base_framework.framework_output, "%s:%d: My nid,pid = (%x,%x)\n", __FILE__, __LINE__, proc->phys.nid, proc->phys.pid); /* check for logical addressing mode in the MTL */ if (0 == proc->phys.pid) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: proc->phys.pid==0, so mtl-portals4 is using logical addressing which coll-portals4 doesn't support. Disqualifying myself.", __FILE__, __LINE__); return NULL; } portals4_module = OBJ_NEW(mca_coll_portals4_module_t); if (NULL == portals4_module) return NULL; *priority = mca_coll_portals4_priority; portals4_module->coll_count = 0; portals4_module->super.coll_module_enable = portals4_module_enable; portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra; portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra; portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra; portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra; portals4_module->super.coll_scatter = ompi_coll_portals4_scatter_intra; portals4_module->super.coll_iscatter = ompi_coll_portals4_iscatter_intra; portals4_module->cached_in_order_bmtree=NULL; portals4_module->cached_in_order_bmtree_root=-1; portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra; portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra; portals4_module->super.coll_allreduce = ompi_coll_portals4_allreduce_intra; portals4_module->super.coll_iallreduce = ompi_coll_portals4_iallreduce_intra; portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra; portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra; return &(portals4_module->super); } /* * Init module on the communicator */ static int portals4_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm) { mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, allreduce); PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, iallreduce); PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, reduce); PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, ireduce); return OMPI_SUCCESS; } #if OPAL_ENABLE_DEBUG /* These string maps are only used for debugging output. * They will be compiled-out when OPAL is configured * without --enable-debug. */ static char *failtype[] = { "PTL_NI_OK", "PTL_NI_PERM_VIOLATION", "PTL_NI_SEGV", "PTL_NI_PT_DISABLED", "PTL_NI_DROPPED", "PTL_NI_UNDELIVERABLE", "PTL_FAIL", "PTL_ARG_INVALID", "PTL_IN_USE", "PTL_ME_NO_MATCH", "PTL_NI_TARGET_INVALID", "PTL_NI_OP_VIOLATION" }; static char *evname[] = { "PTL_EVENT_GET", "PTL_EVENT_GET_OVERFLOW", "PTL_EVENT_PUT", "PTL_EVENT_PUT_OVERFLOW", "PTL_EVENT_ATOMIC", "PTL_EVENT_ATOMIC_OVERFLOW", "PTL_EVENT_FETCH_ATOMIC", "PTL_EVENT_FETCH_ATOMIC_OVERFLOW", "PTL_EVENT_REPLY", "PTL_EVENT_SEND", "PTL_EVENT_ACK", "PTL_EVENT_PT_DISABLED", "PTL_EVENT_AUTO_UNLINK", "PTL_EVENT_AUTO_FREE", "PTL_EVENT_SEARCH", "PTL_EVENT_LINK" }; #endif /* Target EQ */ static int portals4_progress(void) { int count = 0, ret; ptl_event_t ev; ompi_coll_portals4_request_t *ptl_request; while (true) { ret = PtlEQGet(mca_coll_portals4_component.eq_h, &ev); if (PTL_OK == ret) { OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "event type=%s\n", evname[ev.type])); count++; switch (ev.type) { case PTL_EVENT_PUT: /* Non-Blocking / request */ if (PTL_OK == ev.ni_fail_type) { OPAL_OUTPUT_VERBOSE((50, ompi_coll_base_framework.framework_output, "hdr_data %p, matchbits 0x%lx", (void*) ev.hdr_data, ev.match_bits)); assert(0 != ev.hdr_data); ptl_request = (ompi_coll_portals4_request_t*) ev.hdr_data; assert(NULL != ptl_request); switch (ptl_request->type) { case OMPI_COLL_PORTALS4_TYPE_BARRIER: ompi_coll_portals4_ibarrier_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_BCAST: ompi_coll_portals4_ibcast_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_REDUCE: ompi_coll_portals4_ireduce_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_ALLREDUCE: ompi_coll_portals4_iallreduce_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_SCATTER: ompi_coll_portals4_iscatter_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_GATHER: ompi_coll_portals4_igather_intra_fini(ptl_request); break; } } if (PTL_OK != ev.ni_fail_type) { OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "ni_fail_type=%s\n", failtype[ev.ni_fail_type])); } break; default: opal_output(ompi_coll_base_framework.framework_output, "Unexpected event of type %d", ev.type); break; } } else if (PTL_EQ_EMPTY == ret) { break; } else if (PTL_EQ_DROPPED == ret) { opal_output(ompi_coll_base_framework.framework_output, "Flow control situation without recovery (EQ_DROPPED)\n"); ompi_rte_abort(ret, "coll-portals4: Flow control situation without recovery (EQ_DROPPED)"); } else { opal_output(ompi_coll_base_framework.framework_output, "Error returned from PtlEQGet: %d", ret); break; } } return count; } OBJ_CLASS_INSTANCE(mca_coll_portals4_module_t, mca_coll_base_module_t, NULL, NULL);