/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "ompi_config.h" #include "ddt_lib.h" #include "opal/runtime/opal.h" #include "opal/datatype/opal_convertor.h" #include #include #ifdef HAVE_SYS_TIME_H #include #endif #include #include /* Compile with: mpicc -DHAVE_CONFIG_H -I. -I../../include -I../../../ompi-trunk/include -I../.. -I../../include -I../../../ompi-trunk/opal -I../../../ompi-trunk/orte -I../../../ompi-trunk/ompi -g ddt_test.c -o ddt_test */ #define TIMER_DATA_TYPE struct timeval #define GET_TIME(TV) gettimeofday( &(TV), NULL ) #define ELAPSED_TIME(TSTART, TEND) (((TEND).tv_sec - (TSTART).tv_sec) * 1000000 + ((TEND).tv_usec - (TSTART).tv_usec)) #define DUMP_DATA_AFTER_COMMIT 0x00000001 #define CHECK_PACK_UNPACK 0x00000002 uint32_t remote_arch = 0xffffffff; static int test_upper( unsigned int length ) { double *mat1, *mat2, *inbuf; ompi_datatype_t *pdt; opal_convertor_t * pConv; char *ptr; int rc; unsigned int i, j, iov_count, split_chunk, total_length; size_t max_data; struct iovec a; TIMER_DATA_TYPE start, end; long total_time; printf( "test upper matrix\n" ); pdt = upper_matrix( length ); /*dt_dump( pdt );*/ mat1 = malloc( length * length * sizeof(double) ); init_random_upper_matrix( length, mat1 ); mat2 = calloc( length * length, sizeof(double) ); total_length = length * (length + 1) * ( sizeof(double) / 2); inbuf = (double*)malloc( total_length ); ptr = (char*)inbuf; /* copy upper matrix in the array simulating the input buffer */ for( i = 0; i < length; i++ ) { uint32_t pos = i * length + i; for( j = i; j < length; j++, pos++ ) { *inbuf = mat1[pos]; inbuf++; } } inbuf = (double*)ptr; pConv = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &(pdt->super), 1, mat2 ) ) { printf( "Cannot attach the datatype to a convertor\n" ); return OMPI_ERROR; } GET_TIME( start ); split_chunk = (length + 1) * sizeof(double); /* split_chunk = (total_length + 1) * sizeof(double); */ for( i = total_length; i > 0; ) { if( i <= split_chunk ) { /* equal test just to be able to set a breakpoint */ split_chunk = i; } a.iov_base = ptr; a.iov_len = split_chunk; iov_count = 1; max_data = split_chunk; opal_convertor_unpack( pConv, &a, &iov_count, &max_data ); ptr += max_data; i -= max_data; if( mat2[0] != inbuf[0] ) assert(0); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "complete unpacking in %ld microsec\n", total_time ); free( inbuf ); rc = check_diag_matrix( length, mat1, mat2 ); free( mat1 ); free( mat2 ); /* test the automatic destruction pf the data */ ompi_datatype_destroy( &pdt ); assert( pdt == NULL ); OBJ_RELEASE( pConv ); return rc; } /** * Computing the correct buffer length for moving a multiple of a datatype * is not an easy task. Define a function to centralize the complexity in a * single location. */ static size_t compute_buffer_length(ompi_datatype_t* pdt, int count) { MPI_Aint extent, lb, true_extent, true_lb; size_t length; ompi_datatype_get_extent(pdt, &lb, &extent); ompi_datatype_get_true_extent(pdt, &true_lb, &true_extent); (void)true_lb; length = true_lb + true_extent + (count - 1) * extent; return length; } /** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_count( ompi_datatype_t* pdt, int count ) { void *pdst, *psrc; TIMER_DATA_TYPE start, end; long total_time; size_t length; length = compute_buffer_length(pdt, count); pdst = malloc(length); psrc = malloc(length); for( size_t i = 0; i < length; i++ ) ((char*)psrc)[i] = i % 128 + 32; memset(pdst, 0, length); cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); if( OMPI_SUCCESS != ompi_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) { printf( "Unable to copy the datatype in the function local_copy_ddt_count." " Is the datatype committed ?\n" ); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "direct local copy in %ld microsec\n", total_time ); free(pdst); free(psrc); return OMPI_SUCCESS; } static int local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count, ompi_datatype_t* recv_type, int recv_count, int chunk ) { void *pdst = NULL, *psrc = NULL, *ptemp = NULL; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; size_t slength, rlength; rlength = compute_buffer_length(recv_type, recv_count); slength = compute_buffer_length(send_type, send_count); pdst = malloc( rlength ); psrc = malloc( slength ); ptemp = malloc( chunk ); /* initialize the buffers to prevent valgrind from complaining */ for( size_t i = 0; i < slength; i++ ) ((char*)psrc)[i] = i % 128 + 32; memset(pdst, 0, rlength); send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, &(send_type->super), send_count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, &(recv_type->super), recv_count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying different data-types using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( send_convertor != NULL ) { OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL ); } if( recv_convertor != NULL ) { OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL ); } if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OMPI_SUCCESS; } static int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk ) { void *pdst = NULL, *psrc = NULL, *ptemp = NULL; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; max_data = compute_buffer_length(pdt, count); pdst = malloc(max_data); psrc = malloc(max_data); ptemp = malloc(chunk); for( int i = 0; i < length; ((char*)psrc)[i] = i % 128 + 32, i++ ); memset( pdst, 0, length ); send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, &(pdt->super), count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, &(pdt->super), count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying same data-type using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( NULL != send_convertor ) OBJ_RELEASE( send_convertor ); if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor ); if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OMPI_SUCCESS; } /** * Main function. Call several tests and print-out the results. It try to stress the convertor * using difficult data-type constructions as well as strange segment sizes for the conversion. * Usually, it is able to detect most of the data-type and convertor problems. Any modifications * on the data-type engine should first pass all the tests from this file, before going into other * tests. */ int main( int argc, char* argv[] ) { ompi_datatype_t *pdt, *pdt1, *pdt2, *pdt3; int rc, length = 500; opal_init(&argc, &argv); ompi_datatype_init(); /** * By default simulate homogeneous architectures. */ remote_arch = opal_local_arch; printf( "\n\n#\n * TEST INVERSED VECTOR\n #\n\n" ); pdt = create_inversed_vector( &ompi_mpi_int.dt, 10 ); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 100); local_copy_with_convertor(pdt, 100, 956); } OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "\n\n#\n * TEST STRANGE DATATYPE\n #\n\n" ); pdt = create_strange_dt(); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 1); local_copy_with_convertor(pdt, 1, 956); } OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "\n\n#\n * TEST UPPER TRIANGULAR MATRIX (size 100)\n #\n\n" ); pdt = upper_matrix(100); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 1); local_copy_with_convertor(pdt, 1, 48); } OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "\n\n#\n * TEST UPPER MATRIX\n #\n\n" ); rc = test_upper( length ); if( rc == 0 ) printf( "decode [PASSED]\n" ); else printf( "decode [NOT PASSED]\n" ); printf( "\n\n#\n * TEST MATRIX BORDERS\n #\n\n" ); pdt = test_matrix_borders( length, 100 ); if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { ompi_datatype_dump( pdt ); } OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "\n\n#\n * TEST CONTIGUOUS\n #\n\n" ); pdt = test_contiguous(); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "\n\n#\n * TEST STRUCT\n #\n\n" ); pdt = test_struct(); OBJ_RELEASE( pdt ); assert( pdt == NULL ); ompi_datatype_create_contiguous(0, &ompi_mpi_datatype_null.dt, &pdt1); ompi_datatype_create_contiguous(0, &ompi_mpi_datatype_null.dt, &pdt2); ompi_datatype_create_contiguous(0, &ompi_mpi_datatype_null.dt, &pdt3); ompi_datatype_add( pdt3, &ompi_mpi_int.dt, 10, 0, -1 ); ompi_datatype_add( pdt3, &ompi_mpi_float.dt, 5, 10 * sizeof(int), -1 ); ompi_datatype_add( pdt2, &ompi_mpi_float.dt, 1, 0, -1 ); ompi_datatype_add( pdt2, pdt3, 3, sizeof(int) * 1, -1 ); ompi_datatype_add( pdt1, &ompi_mpi_long_long_int.dt, 5, 0, -1 ); ompi_datatype_add( pdt1, &ompi_mpi_long_double.dt, 2, sizeof(long long) * 5, -1 ); printf( ">>--------------------------------------------<<\n" ); if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { ompi_datatype_dump( pdt1 ); } printf( ">>--------------------------------------------<<\n" ); if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { ompi_datatype_dump( pdt2 ); } printf( ">>--------------------------------------------<<\n" ); if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { ompi_datatype_dump( pdt3 ); } OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL ); OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL ); OBJ_RELEASE( pdt3 ); assert( pdt3 == NULL ); printf( ">>--------------------------------------------<<\n" ); printf( " Contiguous data-type (MPI_DOUBLE)\n" ); pdt = MPI_DOUBLE; if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 4500); local_copy_with_convertor( pdt, 4500, 12 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 12 ); } printf( ">>--------------------------------------------<<\n" ); printf( ">>--------------------------------------------<<\n" ); if( outputFlags & CHECK_PACK_UNPACK ) { printf( "Contiguous multiple data-type (4500*1)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 4500 ); local_copy_ddt_count(pdt, 1); local_copy_with_convertor( pdt, 1, 12 ); local_copy_with_convertor_2datatypes( pdt, 1, pdt, 1, 12 ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "Contiguous multiple data-type (450*10)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 450 ); local_copy_ddt_count(pdt, 10); local_copy_with_convertor( pdt, 10, 12 ); local_copy_with_convertor_2datatypes( pdt, 10, pdt, 10, 12 ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "Contiguous multiple data-type (45*100)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 45 ); local_copy_ddt_count(pdt, 100); local_copy_with_convertor( pdt, 100, 12 ); local_copy_with_convertor_2datatypes( pdt, 100, pdt, 100, 12 ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "Contiguous multiple data-type (100*45)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 100 ); local_copy_ddt_count(pdt, 45); local_copy_with_convertor( pdt, 45, 12 ); local_copy_with_convertor_2datatypes( pdt, 45, pdt, 45, 12 ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "Contiguous multiple data-type (10*450)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 10 ); local_copy_ddt_count(pdt, 450); local_copy_with_convertor( pdt, 450, 12 ); local_copy_with_convertor_2datatypes( pdt, 450, pdt, 450, 12 ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( "Contiguous multiple data-type (1*4500)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 1 ); local_copy_ddt_count(pdt, 4500); local_copy_with_convertor( pdt, 4500, 12 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 12 ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); } printf( ">>--------------------------------------------<<\n" ); printf( ">>--------------------------------------------<<\n" ); printf( "Vector data-type (450 times 10 double stride 11)\n" ); pdt = create_vector_type( MPI_DOUBLE, 450, 10, 11 ); ompi_datatype_dump( pdt ); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 1); local_copy_with_convertor( pdt, 1, 12 ); local_copy_with_convertor_2datatypes( pdt, 1, pdt, 1, 12 ); local_copy_with_convertor( pdt, 1, 82 ); local_copy_with_convertor_2datatypes( pdt, 1, pdt, 1, 82 ); local_copy_with_convertor( pdt, 1, 6000 ); local_copy_with_convertor_2datatypes( pdt, 1, pdt, 1, 6000 ); local_copy_with_convertor( pdt, 1, 36000 ); local_copy_with_convertor_2datatypes( pdt, 1, pdt, 1, 36000 ); } printf( ">>--------------------------------------------<<\n" ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( ">>--------------------------------------------<<\n" ); pdt = test_struct_char_double(); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 4500); local_copy_with_convertor( pdt, 4500, 12 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 12 ); } printf( ">>--------------------------------------------<<\n" ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( ">>--------------------------------------------<<\n" ); pdt = test_create_twice_two_doubles(); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_count(pdt, 4500); local_copy_with_convertor( pdt, 4500, 12 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 12 ); } printf( ">>--------------------------------------------<<\n" ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( ">>--------------------------------------------<<\n" ); pdt = test_create_blacs_type(); if( outputFlags & CHECK_PACK_UNPACK ) { ompi_datatype_dump( pdt ); local_copy_ddt_count(pdt, 2); local_copy_ddt_count(pdt, 4500); local_copy_with_convertor( pdt, 4500, 956 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 956 ); local_copy_with_convertor( pdt, 4500, 16*1024 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 16*1024 ); local_copy_with_convertor( pdt, 4500, 64*1024 ); local_copy_with_convertor_2datatypes( pdt, 4500, pdt, 4500, 64*1024 ); } printf( ">>--------------------------------------------<<\n" ); OBJ_RELEASE( pdt ); assert( pdt == NULL ); printf( ">>--------------------------------------------<<\n" ); pdt1 = test_create_blacs_type1( &ompi_mpi_int.dt ); pdt2 = test_create_blacs_type2( &ompi_mpi_int.dt ); if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_with_convertor_2datatypes( pdt1, 1, pdt2, 1, 100 ); } printf( ">>--------------------------------------------<<\n" ); OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL ); OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL ); /* clean-ups all data allocations */ opal_finalize_util(); return OMPI_SUCCESS; }