/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2019 IBM Corporation. All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker * semantics. Since linkers generally pull in symbols by object * files, keeping these symbols as the only symbols in this file * prevents utility programs such as "ompi_info" from having to import * entire components just to query their version and parameters. */ #include "prte_config.h" #include "constants.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #include #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_SYS_SELECT_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_SYS_WAIT_H # include #endif #include #include #ifdef HAVE_PWD_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/mca/pinstalldirs/pinstalldirs_types.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_environ.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/threads/pmix_threads.h" #include "src/util/pmix_fd.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ess/base/base.h" #include "src/mca/ess/ess.h" #include "src/mca/grpcomm/base/base.h" #include "src/mca/oob/base/base.h" #include "src/mca/rmaps/rmaps.h" #include "src/rml/rml_contact.h" #include "src/rml/rml.h" #include "src/mca/state/state.h" #include "src/mca/plm/base/base.h" #include "src/mca/plm/base/plm_private.h" #include "src/mca/plm/plm.h" #include "src/mca/plm/ssh/plm_ssh.h" static int ssh_init(void); static int ssh_launch(prte_job_t *jdata); static int remote_spawn(void); static int ssh_terminate_prteds(void); static int ssh_finalize(void); prte_plm_base_module_t prte_plm_ssh_module = { .init = ssh_init, .set_hnp_name = prte_plm_base_set_hnp_name, .spawn = ssh_launch, .remote_spawn = remote_spawn, .terminate_job = prte_plm_base_prted_terminate_job, .terminate_orteds = ssh_terminate_prteds, .terminate_procs = prte_plm_base_prted_kill_local_procs, .signal_job = prte_plm_base_prted_signal_local_procs, .finalize = ssh_finalize}; typedef struct { pmix_list_item_t super; int argc; char **argv; prte_proc_t *daemon; } prte_plm_ssh_caddy_t; static void caddy_const(prte_plm_ssh_caddy_t *ptr) { ptr->argv = NULL; ptr->daemon = NULL; } static void caddy_dest(prte_plm_ssh_caddy_t *ptr) { if (NULL != ptr->argv) { PMIX_ARGV_FREE_COMPAT(ptr->argv); } if (NULL != ptr->daemon) { PMIX_RELEASE(ptr->daemon); } } PMIX_CLASS_INSTANCE(prte_plm_ssh_caddy_t, pmix_list_item_t, caddy_const, caddy_dest); typedef enum { PRTE_PLM_SSH_SHELL_BASH = 0, PRTE_PLM_SSH_SHELL_ZSH, PRTE_PLM_SSH_SHELL_TCSH, PRTE_PLM_SSH_SHELL_CSH, PRTE_PLM_SSH_SHELL_KSH, PRTE_PLM_SSH_SHELL_SH, PRTE_PLM_SSH_SHELL_UNKNOWN } prte_plm_ssh_shell_t; /* These strings *must* follow the same order as the enum PRTE_PLM_SSH_SHELL_* */ static const char *prte_plm_ssh_shell_name[7] = {"bash", "zsh", "tcsh", /* tcsh has to be first otherwise strstr finds csh */ "csh", "ksh", "sh", "unknown"}; /* * Local functions */ static void set_handler_default(int sig); static prte_plm_ssh_shell_t find_shell(char *shell); static int launch_agent_setup(const char *agent, char *path); static void ssh_child(int argc, char **argv) __prte_attribute_noreturn__; static int ssh_probe(char *nodename, prte_plm_ssh_shell_t *shell); static int setup_shell(prte_plm_ssh_shell_t *sshell, prte_plm_ssh_shell_t *lshell, char *nodename, int *argc, char ***argv); static void launch_daemons(int fd, short args, void *cbdata); static void process_launch_list(int fd, short args, void *cbdata); /* local global storage */ static int num_in_progress = 0; static pmix_list_t launch_list; static prte_event_t launch_event; static char *ssh_agent_path = NULL; static char **ssh_agent_argv = NULL; /** * Init the module */ static int ssh_init(void) { char *tmp; int rc; /* we were selected, so setup the launch agent */ if (prte_mca_plm_ssh_component.using_qrsh) { /* perform base setup for qrsh */ pmix_asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC")); if (PRTE_SUCCESS != (rc = launch_agent_setup("qrsh", tmp))) { PRTE_ERROR_LOG(rc); free(tmp); return rc; } free(tmp); /* automatically add -inherit and grid engine PE related flags */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-inherit"); /* Don't use the "-noshell" flag as qrsh would have a problem * swallowing a long command */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-nostdin"); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-V"); if (0 < pmix_output_get_verbosity(prte_plm_base_framework.framework_output)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-verbose"); tmp = PMIX_ARGV_JOIN_COMPAT(ssh_agent_argv, ' '); pmix_output_verbose(1, prte_plm_base_framework.framework_output, "%s plm:ssh: using \"%s\" for launching\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), tmp); free(tmp); } } else if (prte_mca_plm_ssh_component.using_llspawn) { /* perform base setup for llspawn */ if (PRTE_SUCCESS != (rc = launch_agent_setup("llspawn", NULL))) { PRTE_ERROR_LOG(rc); return rc; } pmix_output_verbose(1, prte_plm_base_framework.framework_output, "%s plm:ssh: using \"%s\" for launching\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ssh_agent_path); } else { /* not using qrsh or llspawn - use MCA-specified agent */ if (PRTE_SUCCESS != (rc = launch_agent_setup(prte_mca_plm_ssh_component.agent, NULL))) { PRTE_ERROR_LOG(rc); return rc; } } /* point to our launch command */ if (PRTE_SUCCESS != (rc = prte_state.add_job_state(PRTE_JOB_STATE_LAUNCH_DAEMONS, launch_daemons))) { PRTE_ERROR_LOG(rc); return rc; } /* setup the event for metering the launch */ PMIX_CONSTRUCT(&launch_list, pmix_list_t); prte_event_set(prte_event_base, &launch_event, -1, 0, process_launch_list, NULL); /* start the recvs */ if (PRTE_SUCCESS != (rc = prte_plm_base_comm_start())) { PRTE_ERROR_LOG(rc); } /* we assign daemon nodes at launch */ prte_plm_globals.daemon_nodes_assigned_at_launch = true; return rc; } /** * Callback on daemon exit. */ static void ssh_wait_daemon(int sd, short flags, void *cbdata) { prte_job_t *jdata; prte_wait_tracker_t *t2 = (prte_wait_tracker_t *) cbdata; prte_plm_ssh_caddy_t *caddy = (prte_plm_ssh_caddy_t *) t2->cbdata; prte_proc_t *daemon = caddy->daemon; pmix_status_t rc; PRTE_HIDE_UNUSED_PARAMS(sd, flags); if (prte_prteds_term_ordered || prte_abnormal_term_ordered) { /* ignore any such report - it will occur if we left the * session attached, e.g., while debugging */ PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } if (!WIFEXITED(daemon->exit_code) || WEXITSTATUS(daemon->exit_code) != 0) { /* if abnormal exit */ /* if we are not the HNP, send a message to the HNP alerting it * to the failure */ if (!PRTE_PROC_IS_MASTER) { pmix_data_buffer_t *buf; PMIX_OUTPUT_VERBOSE( (1, prte_plm_base_framework.framework_output, "%s daemon %s failed with status %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_VPID_PRINT(daemon->name.rank), WEXITSTATUS(daemon->exit_code))); PMIX_DATA_BUFFER_CREATE(buf); rc = PMIx_Data_pack(NULL, buf, &(daemon->name.rank), 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buf); PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } rc = PMIx_Data_pack(NULL, buf, &daemon->exit_code, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buf); PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } PRTE_RML_SEND(rc, PRTE_PROC_MY_HNP->rank, buf, PRTE_RML_TAG_REPORT_REMOTE_LAUNCH); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buf); PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } /* note that this daemon failed */ daemon->state = PRTE_PROC_STATE_FAILED_TO_START; } else { jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); PMIX_OUTPUT_VERBOSE( (1, prte_plm_base_framework.framework_output, "%s daemon %s failed with status %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_VPID_PRINT(daemon->name.rank), WEXITSTATUS(daemon->exit_code))); /* set the exit status */ PRTE_UPDATE_EXIT_STATUS(WEXITSTATUS(daemon->exit_code)); /* note that this daemon failed */ daemon->state = PRTE_PROC_STATE_FAILED_TO_START; /* increment the #daemons terminated so we will exit properly */ jdata->num_terminated++; /* remove it from the routing table to ensure num_routes * returns the correct value */ prte_rml_route_lost(daemon->name.rank); /* report that the daemon has failed so we can exit */ PRTE_ACTIVATE_PROC_STATE(&daemon->name, PRTE_PROC_STATE_FAILED_TO_START); } } /* release any delay */ --num_in_progress; if (num_in_progress < prte_mca_plm_ssh_component.num_concurrent) { /* trigger continuation of the launch */ prte_event_active(&launch_event, EV_WRITE, 1); } /* cleanup */ PMIX_RELEASE(t2); } static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node_name_index1, int *proc_vpid_index, char *prefix_dir) { int argc; char **argv; char *param, *value, *value2; prte_plm_ssh_shell_t remote_shell, local_shell; int orted_argc; char **orted_argv; char *orted_cmd, *orted_prefix, *final_cmd; int orted_index; int rc; int i; char *full_orted_cmd = NULL; char **final_argv = NULL; char *tmp; /* Figure out the basenames for the libdir and bindir. This requires some explanation: - Use prte_install_dirs.libdir and prte_install_dirs.bindir. - After a discussion on the devel-core mailing list, the developers decided that we should use the local directory basenames as the basis for the prefix on the remote note. This does not handle a few notable cases (e.g., if the libdir/bindir is not simply a subdir under the prefix, if the libdir/bindir basename is not the same on the remote node as it is here on the local node, etc.), but we decided that --prefix was meant to handle "the common case". If you need something more complex than this, a) edit your shell startup files to set PATH/LD_LIBRARY_PATH properly on the remove node, or b) use some new/to-be-defined options that explicitly allow setting the bindir/libdir on the remote node. We decided to implement these options (e.g., --remote-bindir and --remote-libdir) to prun when it actually becomes a problem for someone (vs. a hypothetical situation). Hence, for now, we simply take the basename of this install's libdir and bindir and use it to append this install's prefix and use that on the remote node. */ /* * Build argv array */ argv = PMIX_ARGV_COPY_COMPAT(ssh_agent_argv); argc = PMIX_ARGV_COUNT_COMPAT(argv); /* if any ssh args were provided, now is the time to add them */ if (NULL != prte_mca_plm_ssh_component.ssh_args) { char **ssh_argv; ssh_argv = PMIX_ARGV_SPLIT_COMPAT(prte_mca_plm_ssh_component.ssh_args, ' '); for (i = 0; NULL != ssh_argv[i]; i++) { pmix_argv_append(&argc, &argv, ssh_argv[i]); } PMIX_ARGV_FREE_COMPAT(ssh_argv); } *node_name_index1 = argc; pmix_argv_append(&argc, &argv, "