/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * P2P Management Layer (PML) * * An MCA component type that provides the P2P interface functionality * required by the MPI layer. The PML is a relatively thin layer that * primarily provides for the fragmentation and scheduling of messages * over multiple transports (instances of the Byte Transfer Layer * (BTL) MCA component type) as depicted below: * * ------------------------------------ * | MPI | * ------------------------------------ * | PML | * ------------------------------------ * | BTL (TCP) | BTL (SM) | BTL (...) | * ------------------------------------ * * A single PML component is selected by the MCA framework during * library initialization. Initially, all available PMLs are loaded * (potentially as shared libraries) and their component open and init * functions called. The MCA framework selects the component * returning the highest priority and closes/unloads any other PML * components that may have been opened. * * After all of the MCA components are initialized, the MPI/RTE will * make downcalls into the PML to provide the initial list of * processes (ompi_proc_t instances), and notification of changes * (add/delete). * * The PML module must select the set of BTL components that are to be * used to reach a given destination. These should be cached on a PML * specific data structure that is hung off the ompi_proc_t. * * The PML should then apply a scheduling algorithm (round-robin, * weighted distribution, etc), to schedule the delivery of messages * over the available BTLs. * */ #ifndef MCA_PML_H #define MCA_PML_H #include "ompi_config.h" #include "ompi/mca/mca.h" #include "mpi.h" /* needed for MPI_ANY_TAG */ #include "ompi/mca/pml/pml_constants.h" #include "ompi/request/request.h" BEGIN_C_DECLS /* * PML component types */ typedef uint64_t mca_pml_sequence_t; struct ompi_proc_t; /** * MCA->PML Called by MCA framework to initialize the component. * * @param priority (OUT) Relative priority or ranking used by MCA to * selected a component. * * @param enable_progress_threads (IN) Whether this component is * allowed to run a hidden/progress thread or not. * * @param enable_mpi_threads (IN) Whether support for multiple MPI * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which * indicates whether multiple threads may invoke this component * simultaneously or not. */ typedef struct mca_pml_base_module_2_1_0_t * (*mca_pml_base_component_init_fn_t)( int *priority, bool enable_progress_threads, bool enable_mpi_threads); /** * @brief A transport name and device name pair used by an endpoint. * @ref mca_pml.pml_get_transports */ typedef struct mca_pml_transport_entry_t { // The name of a transport layer used by this endpoint. const char *transport_name; // The name of the device used with this transport by this endpoint. const char *device_name; } mca_pml_transport_entry_t; /** * @brief Structure containing an array of transport layers and device names * used by a PML. * @ref mca_pml.pml_get_transports */ typedef struct mca_pml_transports_t { // Number of transport/device name pairs. unsigned int count; // Pointer to array of transport/device name pairs used by this endpoint. mca_pml_transport_entry_t *entries; } mca_pml_transports_t; typedef int (*mca_pml_base_component_finalize_fn_t)(void); /** * PML component version and interface functions. */ struct mca_pml_base_component_2_1_0_t { mca_base_component_t pmlm_version; mca_base_component_data_t pmlm_data; mca_pml_base_component_init_fn_t pmlm_init; mca_pml_base_component_finalize_fn_t pmlm_finalize; }; typedef struct mca_pml_base_component_2_1_0_t mca_pml_base_component_2_1_0_t; typedef mca_pml_base_component_2_1_0_t mca_pml_base_component_t; /** * MCA management functions. */ /** * Downcall from MPI/RTE layer when new processes are created. * * @param procs Array of new processes * @param nprocs Size of process array * @return OMPI_SUCCESS or failure status. * * Provides a notification to the PML that new processes have been * created, and provides the PML the opportunity to cache data * (e.g. list of BTLs to use) on the ompi_proc_t data structure. */ typedef int (*mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs); /** * Downcall from MPI/RTE layer when processes are terminated. * * @param procs Array of processes * @param nprocs Size of process array * @return OMPI_SUCCESS or failure status. * * Provides a notification to the PML that processes have * gone away, and provides the PML the opportunity to cleanup * any data cached on the ompi_proc_t data structure. */ typedef int (*mca_pml_base_module_del_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs); /** * Downcall from MCA layer to enable the PML/BTLs. * * @param enable Enable/Disable PML forwarding * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_enable_fn_t)( bool enable ); /** * For non-threaded case, provides MCA the opportunity to * progress outstanding requests on all btls. * * * @return Count of "completions", a metric of * how many items where completed in the call * to progress. */ typedef int (*mca_pml_base_module_progress_fn_t)(void); /** * Get the set of transports and devices used to communicate with specified rank * * @param comm Communicator * @param rank Task rank of other task * @return Pointer to structure containing set of transports and device names or NULL */ typedef mca_pml_transports_t *(*mca_pml_base_module_get_transports_t)(struct ompi_communicator_t *comm, int rank); /** * MPI Interface Functions */ /** * Downcall from MPI layer when a new communicator is created. * * @param comm Communicator * @return OMPI_SUCCESS or failure status. * * Provides the PML the opportunity to initialize/cache a data structure * on the communicator. */ typedef int (*mca_pml_base_module_add_comm_fn_t)(struct ompi_communicator_t* comm); /** * Downcall from MPI layer when a communicator is destroyed. * * @param comm Communicator * @return OMPI_SUCCESS or failure status. * * Provides the PML the opportunity to cleanup any datastructures * associated with the communicator. */ typedef int (*mca_pml_base_module_del_comm_fn_t)(struct ompi_communicator_t* comm); /** * Downcall from MPI layer when a communicator is revoked. * * @param comm (INOUT) Communicator * @param coll_only (IN) Revoke only the collective operations, or all * non-FT operations * @return OMPI_SUCCESS or failure status. * * Provides the PML the opportunity to change unexpected fragments * handling and purge the queues associated with the communicator. */ /* not conditional on OPAL_ENABLE_FT_MPI for ABI */ typedef int (*mca_pml_base_module_revoke_comm_fn_t)(struct ompi_communicator_t* comm, bool coll_only); /** * Initialize a persistent receive request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. * @param datatype (IN) User defined datatype. * @param src (IN) Source rank w/in communicator. * @param tag (IN) User defined tag. * @param comm (IN) Communicator. * @param request (OUT) Request handle. * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_irecv_init_fn_t)( void *buf, size_t count, struct ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t* comm, struct ompi_request_t **request ); /** * Post a receive request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. * @param datatype (IN) User defined datatype. * @param src (IN) Source rank w/in communicator. * @param tag (IN) User defined tag. * @param comm (IN) Communicator. * @param request (OUT) Request handle. * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_irecv_fn_t)( void *buf, size_t count, struct ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t* comm, struct ompi_request_t **request ); typedef int (*mca_pml_base_module_imrecv_fn_t)( void *buf, size_t count, struct ompi_datatype_t *datatype, struct ompi_message_t **message, struct ompi_request_t **request ); /** * Post a receive and wait for completion. * * @param buf (IN) User buffer * @param count (IN) Number of elements of the specified datatype * @param datatype (IN) User defined datatype * @param src (IN) Source rank w/in communicator * @param tag (IN) User defined tag * @param comm (IN) Communicator * @param status (OUT) Completion status * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_recv_fn_t)( void *buf, size_t count, struct ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status ); typedef int (*mca_pml_base_module_mrecv_fn_t)( void *buf, size_t count, struct ompi_datatype_t *datatype, struct ompi_message_t **message, ompi_status_public_t* status ); /** * Initialize a persistent send request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. * @param datatype (IN) User defined datatype. * @param dst (IN) Peer rank w/in communicator. * @param tag (IN) User defined tag. * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) * @param comm (IN) Communicator. * @param request (OUT) Request handle. * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_isend_init_fn_t)( const void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); /** * Post a send request. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. * @param datatype (IN) User defined datatype. * @param dst (IN) Peer rank w/in communicator. * @param tag (IN) User defined tag. * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) * @param comm (IN) Communicator. * @param request (OUT) Request handle. * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_isend_fn_t)( const void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request ); /** * Post a send request and wait for completion. * * @param buf (IN) User buffer. * @param count (IN) Number of elements of the specified datatype. * @param datatype (IN) User defined datatype. * @param dst (IN) Peer rank w/in communicator. * @param tag (IN) User defined tag. * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) * @param comm (IN) Communicator. * @return OMPI_SUCCESS or failure status. */ typedef int (*mca_pml_base_module_send_fn_t)( const void *buf, size_t count, struct ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm ); /** * Initiate one or more persistent requests. * * @param count (IN) Number of requests * @param requests (IN/OUT) Array of persistent requests * @return OMPI_SUCCESS or failure status. */ typedef ompi_request_start_fn_t mca_pml_base_module_start_fn_t; /** * Probe to poll for pending recv. * * @param src (IN) Source rank w/in communicator. * @param tag (IN) User defined tag. * @param comm (IN) Communicator. * @param matched (OUT) Flag indicating if matching recv exists. * @param status (OUT) Completion statuses. * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_iprobe_fn_t)( int src, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t *status ); typedef int (*mca_pml_base_module_improbe_fn_t)( int src, int tag, struct ompi_communicator_t* comm, int *matched, struct ompi_message_t **message, ompi_status_public_t *status ); /** * Blocking probe to wait for pending recv. * * @param src (IN) Source rank w/in communicator. * @param tag (IN) User defined tag. * @param comm (IN) Communicator. * @param status (OUT) Completion statuses. * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_probe_fn_t)( int src, int tag, struct ompi_communicator_t* comm, ompi_status_public_t *status ); typedef int (*mca_pml_base_module_mprobe_fn_t)( int src, int tag, struct ompi_communicator_t* comm, struct ompi_message_t **message, ompi_status_public_t *status ); /** * Cancel pending operation. * * @param request (IN) Request * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_cancel_fn_t)( struct ompi_request_t* request ); /** * Has a request been cancelled? * * @param request (IN) Request * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_cancelled_fn_t)( struct ompi_request_t* request, int *flag ); /** * Release resources held by a persistent mode request. * * @param request (IN) Request * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_free_fn_t)( struct ompi_request_t** request ); /** * A special NULL request handle. * * @param request (OUT) Request * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_null_fn_t)( struct ompi_request_t** request ); /** * Diagnostics function. * * @param request (IN) Communicator * @param verbose (IN) Verbosity level (passed to BTL) * @return OMPI_SUCCESS or failure status. * */ typedef int (*mca_pml_base_module_dump_fn_t)( struct ompi_communicator_t* comm, int verbose ); /** * pml module flags */ /** PML requires requires all procs in the job on the first call to * add_procs */ #define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 /** * PML supports the extended CID space (doesn't need a global communicator index) */ #define MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID 0x00000002 /** * PML instance. */ struct mca_pml_base_module_2_1_0_t { /* downcalls from MCA to PML */ mca_pml_base_module_add_procs_fn_t pml_add_procs; mca_pml_base_module_del_procs_fn_t pml_del_procs; mca_pml_base_module_enable_fn_t pml_enable; mca_pml_base_module_progress_fn_t pml_progress; /* downcalls from MPI to PML */ mca_pml_base_module_add_comm_fn_t pml_add_comm; mca_pml_base_module_del_comm_fn_t pml_del_comm; mca_pml_base_module_revoke_comm_fn_t pml_revoke_comm; mca_pml_base_module_irecv_init_fn_t pml_irecv_init; mca_pml_base_module_irecv_fn_t pml_irecv; mca_pml_base_module_recv_fn_t pml_recv; mca_pml_base_module_isend_init_fn_t pml_isend_init; mca_pml_base_module_isend_fn_t pml_isend; mca_pml_base_module_send_fn_t pml_send; mca_pml_base_module_iprobe_fn_t pml_iprobe; mca_pml_base_module_probe_fn_t pml_probe; mca_pml_base_module_start_fn_t pml_start; mca_pml_base_module_improbe_fn_t pml_improbe; mca_pml_base_module_mprobe_fn_t pml_mprobe; mca_pml_base_module_imrecv_fn_t pml_imrecv; mca_pml_base_module_mrecv_fn_t pml_mrecv; /* diagnostics */ mca_pml_base_module_dump_fn_t pml_dump; /* maximum constant sizes */ uint32_t pml_max_contextid; int pml_max_tag; int pml_flags; mca_pml_base_module_get_transports_t pml_get_transports; }; typedef struct mca_pml_base_module_2_1_0_t mca_pml_base_module_2_1_0_t; typedef mca_pml_base_module_2_1_0_t mca_pml_base_module_t; /* * Macro for use in components that are of type pml */ #define MCA_PML_BASE_VERSION_2_1_0 \ OMPI_MCA_BASE_VERSION_2_1_0("pml", 2, 1, 0) /* * macro for doing direct call / call through struct */ #if MCA_ompi_pml_DIRECT_CALL #include MCA_ompi_pml_DIRECT_CALL_HEADER #define MCA_PML_CALL_STAMP(a, b) mca_pml_ ## a ## _ ## b #define MCA_PML_CALL_EXPANDER(a, b) MCA_PML_CALL_STAMP(a,b) #define MCA_PML_CALL(a) MCA_PML_CALL_EXPANDER(MCA_ompi_pml_DIRECT_CALL_COMPONENT, a) #else #define MCA_PML_CALL(a) mca_pml.pml_ ## a #endif OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml; static inline bool mca_pml_base_requires_world (void) { return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD); } static inline bool mca_pml_base_supports_extended_cid (void) { return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID); } END_C_DECLS #endif /* MCA_PML_H */