/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Most of the description of the data layout is in the * coll_sm_module.c file. */ #include "ompi_config.h" #include "opal/util/show_help.h" #include "opal/util/printf.h" #include "ompi/constants.h" #include "ompi/mca/coll/coll.h" #include "coll_sm.h" /* * Public string showing the coll ompi_sm component version number */ const char *mca_coll_sm_component_version_string = "Open MPI sm collective MCA component version " OMPI_VERSION; /* * Local functions */ static int sm_close(void); static int sm_register(void); static int coll_sm_shared_mem_used_data; /* * Instantiate the public struct with all of our public information * and pointers to our public functions in it */ mca_coll_sm_component_t mca_coll_sm_component = { /* First, fill in the super */ { /* First, the mca_component_t struct containing meta information about the component itself */ .collm_version = { MCA_COLL_BASE_VERSION_2_4_0, /* Component name and version */ .mca_component_name = "sm", MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), /* Component functions */ .mca_close_component = sm_close, .mca_register_component_params = sm_register, }, .collm_data = { /* The component is not checkpoint ready */ MCA_BASE_METADATA_PARAM_NONE }, /* Initialization / querying functions */ .collm_init_query = mca_coll_sm_init_query, .collm_comm_query = mca_coll_sm_comm_query, }, /* sm-component specific information */ /* (default) priority */ /* JMS temporarily lowered until we can get more testing */ 0, /* (default) control size (bytes) */ 4096, /* (default) number of "in use" flags for each communicator's area in the per-communicator shmem segment */ 2, /* (default) number of segments for each communicator in the per-communicator shmem segment */ 8, /* (default) fragment size */ 8192, /* (default) degree of tree for tree-based operations (must be <= control unit size) */ 4, /* (default) number of processes in coll_sm_shared_mem_size information variable */ 4, /* default values for non-MCA parameters */ /* Not specifying values here gives us all 0's */ }; /* * Shut down the component */ static int sm_close(void) { return OMPI_SUCCESS; } static int sm_verify_mca_variables(void) { mca_coll_sm_component_t *cs = &mca_coll_sm_component; if (0 != (cs->sm_fragment_size % cs->sm_control_size)) { cs->sm_fragment_size += cs->sm_control_size - (cs->sm_fragment_size % cs->sm_control_size); } if (cs->sm_comm_num_in_use_flags < 2) { cs->sm_comm_num_in_use_flags = 2; } if (cs->sm_comm_num_segments < cs->sm_comm_num_in_use_flags) { cs->sm_comm_num_segments = cs->sm_comm_num_in_use_flags; } if (0 != (cs->sm_comm_num_segments % cs->sm_comm_num_in_use_flags)) { cs->sm_comm_num_segments += cs->sm_comm_num_in_use_flags - (cs->sm_comm_num_segments % cs->sm_comm_num_in_use_flags); } cs->sm_segs_per_inuse_flag = cs->sm_comm_num_segments / cs->sm_comm_num_in_use_flags; if (cs->sm_tree_degree > cs->sm_control_size) { opal_show_help("help-mpi-coll-sm.txt", "tree-degree-larger-than-control", true, cs->sm_tree_degree, cs->sm_control_size); cs->sm_tree_degree = cs->sm_control_size; } if (cs->sm_tree_degree > 255) { opal_show_help("help-mpi-coll-sm.txt", "tree-degree-larger-than-255", true, cs->sm_tree_degree); cs->sm_tree_degree = 255; } coll_sm_shared_mem_used_data = (int)(4 * cs->sm_control_size + (cs->sm_comm_num_in_use_flags * cs->sm_control_size) + (cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_control_size * 2)) + (cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_fragment_size))); return OMPI_SUCCESS; } /* * Register MCA params */ static int sm_register(void) { mca_base_component_t *c = &mca_coll_sm_component.super.collm_version; mca_coll_sm_component_t *cs = &mca_coll_sm_component; /* If we want to be selected (i.e., all procs on one node), then we should have a high priority */ cs->sm_priority = 0; (void) mca_base_component_var_register(c, "priority", "Priority of the sm coll component", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_priority); cs->sm_control_size = 4096; (void) mca_base_component_var_register(c, "control_size", "Length of the control data -- should usually be either the length of a cache line on most SMPs, or the size of a page on machines that support direct memory affinity page placement (in bytes)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_control_size); cs->sm_fragment_size = 8192; (void) mca_base_component_var_register(c, "fragment_size", "Fragment size (in bytes) used for passing data through shared memory (will be rounded up to the nearest control_size size)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_fragment_size); cs->sm_comm_num_in_use_flags = 2; (void) mca_base_component_var_register(c, "comm_in_use_flags", "Number of \"in use\" flags, used to mark a message passing area segment as currently being used or not (must be >= 2 and <= comm_num_segments)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_comm_num_in_use_flags); cs->sm_comm_num_segments = 8; (void) mca_base_component_var_register(c, "comm_num_segments", "Number of segments in each communicator's shared memory message passing area (must be >= 2, and must be a multiple of comm_in_use_flags)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_comm_num_segments); cs->sm_tree_degree = 4; (void) mca_base_component_var_register(c, "tree_degree", "Degree of the tree for tree-based operations (must be => 1 and <= min(control_size, 255))", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_tree_degree); /* INFO: Calculate how much space we need in the per-communicator shmem data segment. This formula taken directly from coll_sm_module.c. */ cs->sm_info_comm_size = 4; (void) mca_base_component_var_register(c, "info_num_procs", "Number of processes to use for the calculation of the shared_mem_size MCA information parameter (must be => 2)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->sm_info_comm_size); coll_sm_shared_mem_used_data = (int)(4 * cs->sm_control_size + (cs->sm_comm_num_in_use_flags * cs->sm_control_size) + (cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_control_size * 2)) + (cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_fragment_size))); (void) mca_base_component_var_register(c, "shared_mem_used_data", "Amount of shared memory used, per communicator, in the shared memory data area for info_num_procs processes (in bytes)", MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &coll_sm_shared_mem_used_data); return sm_verify_mca_variables(); }