/* * Copyright (c) 2016-2017 Inria. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /* Measurement for the pml_monitoring component overhead Designed by Clement Foyer Contact the authors for questions. To be run as: */ #include #include #include #include #include #include "mpi.h" #define NB_ITER 1000 #define FULL_NB_ITER (size_world * NB_ITER) #define MAX_SIZE (1024 * 1024 * 1.4) #define NB_OPS 6 static int rank_world = -1; static int size_world = 0; static int to = -1; static int from = -1; static MPI_Win win = MPI_WIN_NULL; /* Sorting results */ static int comp_double(const void*_a, const void*_b) { const double*a = _a; const double*b = _b; if(*a < *b) return -1; else if(*a > *b) return 1; else return 0; } /* Timing */ static inline void get_tick(struct timespec*t) { #if defined(__bg__) # define CLOCK_TYPE CLOCK_REALTIME #elif defined(CLOCK_MONOTONIC_RAW) # define CLOCK_TYPE CLOCK_MONOTONIC_RAW #elif defined(CLOCK_MONOTONIC) # define CLOCK_TYPE CLOCK_MONOTONIC #endif #if defined(CLOCK_TYPE) clock_gettime(CLOCK_TYPE, t); #else struct timeval tv; gettimeofday(&tv, NULL); t->tv_sec = tv.tv_sec; t->tv_nsec = tv.tv_usec * 1000; #endif } static inline double timing_delay(const struct timespec*const t1, const struct timespec*const t2) { const double delay = 1000000.0 * (t2->tv_sec - t1->tv_sec) + (t2->tv_nsec - t1->tv_nsec) / 1000.0; return delay; } /* Operations */ static inline void op_send(double*res, void*sbuf, int size, int tagno, void*rbuf) { MPI_Request request; struct timespec start, end; /* Post to be sure no unexpected message will be generated */ MPI_Irecv(rbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD, &request); /* Token ring to synchronize */ /* We message the sender to make him know we are ready to receive (even for non-eager mode sending) */ if( 0 == rank_world ) { MPI_Send(NULL, 0, MPI_BYTE, from, 100, MPI_COMM_WORLD); MPI_Recv(NULL, 0, MPI_BYTE, to, 100, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else { MPI_Recv(NULL, 0, MPI_BYTE, to, 100, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(NULL, 0, MPI_BYTE, from, 100, MPI_COMM_WORLD); } /* do monitored operation */ get_tick(&start); MPI_Send(sbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD); get_tick(&end); MPI_Wait(&request, MPI_STATUS_IGNORE); *res = timing_delay(&start, &end); } static inline void op_send_pingpong(double*res, void*sbuf, int size, int tagno, void*rbuf) { struct timespec start, end; MPI_Barrier(MPI_COMM_WORLD); /* do monitored operation */ if(rank_world % 2) { /* Odd ranks : Recv - Send */ MPI_Recv(rbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(sbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); get_tick(&start); MPI_Send(sbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD); MPI_Recv(rbuf, size, MPI_BYTE, from, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); get_tick(&end); } else { /* Even ranks : Send - Recv */ get_tick(&start); MPI_Send(sbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD); MPI_Recv(rbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); get_tick(&end); MPI_Barrier(MPI_COMM_WORLD); MPI_Recv(rbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Send(sbuf, size, MPI_BYTE, to, tagno, MPI_COMM_WORLD); } *res = timing_delay(&start, &end) / 2; } static inline void op_coll(double*res, void*buff, int size, int tagno, void*rbuf) { struct timespec start, end; MPI_Barrier(MPI_COMM_WORLD); /* do monitored operation */ get_tick(&start); MPI_Bcast(buff, size, MPI_BYTE, 0, MPI_COMM_WORLD); get_tick(&end); *res = timing_delay(&start, &end); } static inline void op_a2a(double*res, void*sbuf, int size, int tagno, void*rbuf) { struct timespec start, end; MPI_Barrier(MPI_COMM_WORLD); /* do monitored operation */ get_tick(&start); MPI_Alltoall(sbuf, size, MPI_BYTE, rbuf, size, MPI_BYTE, MPI_COMM_WORLD); get_tick(&end); *res = timing_delay(&start, &end); } static inline void op_put(double*res, void*sbuf, int size, int tagno, void*rbuf) { struct timespec start, end; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, to, 0, win); /* do monitored operation */ get_tick(&start); MPI_Put(sbuf, size, MPI_BYTE, to, 0, size, MPI_BYTE, win); MPI_Win_unlock(to, win); get_tick(&end); *res = timing_delay(&start, &end); } static inline void op_get(double*res, void*rbuf, int size, int tagno, void*sbuf) { struct timespec start, end; MPI_Win_lock(MPI_LOCK_SHARED, to, 0, win); /* do monitored operation */ get_tick(&start); MPI_Get(rbuf, size, MPI_BYTE, to, 0, size, MPI_BYTE, win); MPI_Win_unlock(to, win); get_tick(&end); *res = timing_delay(&start, &end); } static inline void do_bench(int size, char*sbuf, double*results, void(*op)(double*, void*, int, int, void*)) { int iter; int tagno = 201; char*rbuf = sbuf ? sbuf + size : NULL; if(op == op_put || op == op_get){ win = MPI_WIN_NULL; MPI_Win_create(rbuf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); } for( iter = 0; iter < NB_ITER; ++iter ) { op(&results[iter], sbuf, size, tagno, rbuf); MPI_Barrier(MPI_COMM_WORLD); } if(op == op_put || op == op_get){ MPI_Win_free(&win); win = MPI_WIN_NULL; } } int main(int argc, char* argv[]) { int size, iter, nop; char*sbuf = NULL; double results[NB_ITER]; void(*op)(double*, void*, int, int, void*); char name[255]; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank_world); MPI_Comm_size(MPI_COMM_WORLD, &size_world); to = (rank_world + 1) % size_world; from = (rank_world + size_world - 1) % size_world; double full_res[FULL_NB_ITER]; for( nop = 0; nop < NB_OPS; ++nop ) { switch(nop) { case 0: op = op_send; sprintf(name, "MPI_Send"); break; case 1: op = op_coll; sprintf(name, "MPI_Bcast"); break; case 2: op = op_a2a; sprintf(name, "MPI_Alltoall"); break; case 3: op = op_send_pingpong; sprintf(name, "MPI_Send_pp"); break; case 4: op = op_put; sprintf(name, "MPI_Put"); break; case 5: op = op_get; sprintf(name, "MPI_Get"); break; } if( 0 == rank_world ) printf("# %s%%%d\n# size \t| latency \t| 10^6 B/s \t| MB/s \t| median \t| q1 \t| q3 \t| d1 \t| d9 \t| avg \t| max\n", name, size_world); for(size = 0; size < MAX_SIZE; size = ((int)(size * 1.4) > size) ? (size * 1.4) : (size + 1)) { /* Init buffers */ if( 0 != size ) { sbuf = (char *)realloc(sbuf, (size_world + 1) * size); /* sbuf + alltoall recv buf */ } do_bench(size, sbuf, results, op); MPI_Gather(results, NB_ITER, MPI_DOUBLE, full_res, NB_ITER, MPI_DOUBLE, 0, MPI_COMM_WORLD); if( 0 == rank_world ) { qsort(full_res, FULL_NB_ITER, sizeof(double), &comp_double); const double min_lat = full_res[0]; const double max_lat = full_res[FULL_NB_ITER - 1]; const double med_lat = full_res[(FULL_NB_ITER - 1) / 2]; const double q1_lat = full_res[(FULL_NB_ITER - 1) / 4]; const double q3_lat = full_res[ 3 * (FULL_NB_ITER - 1) / 4]; const double d1_lat = full_res[(FULL_NB_ITER - 1) / 10]; const double d9_lat = full_res[ 9 * (FULL_NB_ITER - 1) / 10]; double avg_lat = 0.0; for( iter = 0; iter < FULL_NB_ITER; iter++ ){ avg_lat += full_res[iter]; } avg_lat /= FULL_NB_ITER; const double bw_million_byte = size / min_lat; const double bw_mbyte = bw_million_byte / 1.048576; printf("%9lld\t%9.3lf\t%9.3f\t%9.3f\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf\t%9.3lf", (long long)size, min_lat, bw_million_byte, bw_mbyte, med_lat, q1_lat, q3_lat, d1_lat, d9_lat, avg_lat, max_lat); printf("\n"); } } free(sbuf); sbuf = NULL; } MPI_Finalize(); return EXIT_SUCCESS; }