#include #ifdef STL_UNORDERED #include #define MAPNAME std::unordered_map #define EXTRAARGS #elif defined(PHMAP_FLAT) #include "parallel_hashmap/phmap.h" #define MAPNAME phmap::flat_hash_map #define NMSP phmap #define EXTRAARGS #else #include "parallel_hashmap/phmap.h" #if 1 #include #define MTX std::mutex #elif 0 // Abseil's mutexes are very efficient (at least on windows) #include "absl/synchronization/mutex.h" #define MTX phmap::AbslMutex #elif 1 #include #if 1 #include #define MTX boost::mutex // faster if all we do is exclusive locks like this bench #else #include #define MTX boost::upgrade_mutex #endif #elif 1 #include class srwlock { SRWLOCK _lock; public: srwlock() { InitializeSRWLock(&_lock); } void lock() { AcquireSRWLockExclusive(&_lock); } void unlock() { ReleaseSRWLockExclusive(&_lock); } }; #define MTX srwlock #else // spinlocks - slow! #include class spinlock { std::atomic_flag flag = ATOMIC_FLAG_INIT; public: void lock() { while(flag.test_and_set(std::memory_order_acquire)); } void unlock() { flag.clear(std::memory_order_release); } }; #define MTX spinlock #endif #define MAPNAME phmap::parallel_flat_hash_map #define NMSP phmap #define MT_SUPPORT 1 #if MT_SUPPORT == 1 // create the parallel_flat_hash_map without internal mutexes, for when // we programatically ensure that each thread uses different internal submaps // -------------------------------------------------------------------------- #define EXTRAARGS , NMSP::priv::hash_default_hash, \ NMSP::priv::hash_default_eq, \ std::allocator>, 4, NMSP::NullMutex #elif MT_SUPPORT == 2 // create the parallel_flat_hash_map with internal mutexes, for when // we read/write the same parallel_flat_hash_map from multiple threads, // without any special precautions. // -------------------------------------------------------------------------- #define EXTRAARGS , NMSP::priv::hash_default_hash, \ NMSP::priv::hash_default_eq, \ std::allocator>, 4, MTX #else #define EXTRAARGS #endif #endif #define phmap_xstr(s) phmap_str(s) #define phmap_str(s) #s template using HashT = MAPNAME; using hash_t = HashT; using str_hash_t = HashT; const char *program_slug = phmap_xstr(MAPNAME); // "_4"; #include #include #include #include #include #include #include #include #include #include "parallel_hashmap/meminfo.h" #include using std::vector; int64_t _abs(int64_t x) { return (x < 0) ? -x : x; } #ifdef _MSC_VER #pragma warning(disable : 4996) #endif // _MSC_VER // -------------------------------------------------------------------------- class Timer { typedef std::chrono::high_resolution_clock high_resolution_clock; typedef std::chrono::milliseconds milliseconds; public: explicit Timer(bool run = false) { if (run) reset(); } void reset() { _start = high_resolution_clock::now(); } milliseconds elapsed() const { return std::chrono::duration_cast(high_resolution_clock::now() - _start); } private: high_resolution_clock::time_point _start; }; // -------------------------------------------------------------------------- // from: https://github.com/preshing/RandomSequence // -------------------------------------------------------------------------- class RSU { private: unsigned int m_index; unsigned int m_intermediateOffset; static unsigned int permuteQPR(unsigned int x) { static const unsigned int prime = 4294967291u; if (x >= prime) return x; // The 5 integers out of range are mapped to themselves. unsigned int residue = ((unsigned long long) x * x) % prime; return (x <= prime / 2) ? residue : prime - residue; } public: RSU(unsigned int seedBase, unsigned int seedOffset) { m_index = permuteQPR(permuteQPR(seedBase) + 0x682f0161); m_intermediateOffset = permuteQPR(permuteQPR(seedOffset) + 0x46790905); } unsigned int next() { return permuteQPR((permuteQPR(m_index++) + m_intermediateOffset) ^ 0x5bf03635); } }; // -------------------------------------------------------------------------- char * new_string_from_integer(uint64_t num) { int ndigits = num == 0 ? 1 : (int)log10(num) + 1; char * str = (char *)malloc(ndigits + 1); sprintf(str, "%u", (unsigned int)num); return str; } // -------------------------------------------------------------------------- template void _fill(vector &v) { srand(1); // for a fair/deterministic comparison for (size_t i = 0, sz = v.size(); i < sz; ++i) v[i] = (T)(i * 10 + rand() % 10); } // -------------------------------------------------------------------------- template void _shuffle(vector &v) { for (size_t n = v.size(); n >= 2; --n) std::swap(v[n - 1], v[static_cast(rand()) % n]); } // -------------------------------------------------------------------------- template Timer _fill_random(vector &v, HT &hash) { _fill(v); _shuffle(v); Timer timer(true); for (size_t i = 0, sz = v.size(); i < sz; ++i) hash.insert(typename HT::value_type(v[i], 0)); return timer; } // -------------------------------------------------------------------------- void out(const char* test, int64_t cnt, const Timer &t, bool = false) { printf("%s,time,%u,%s,%f\n", test, (unsigned int)cnt, program_slug, (float)((double)t.elapsed().count() / 1000)); } // -------------------------------------------------------------------------- void outmem(const char*, int64_t cnt, uint64_t mem, bool final = false) { static uint64_t max_mem = 0; static uint64_t max_keys = 0; if (final) printf("peak memory usage for %u values: %.2f GB\n", (unsigned int)max_keys, max_mem / ((double)1000 * 1000 * 1000)); else { if (mem > max_mem) max_mem = mem; if ((uint64_t)cnt > max_keys) max_keys = cnt; } } static bool all_done = false; static int64_t s_num_keys[16] = { 0 }; static int64_t loop_idx = 0; static int64_t inner_cnt = 0; static const char *test = "random"; // -------------------------------------------------------------------------- template void _fill_random_inner(int64_t cnt, HT &hash, RSU &rsu) { for (int64_t i=0; i void _fill_random_inner_mt(int64_t cnt, HT &hash, RSU &rsu) { constexpr int64_t num_threads = 8; // has to be a power of two std::unique_ptr threads[num_threads]; auto thread_fn = [&hash, cnt, num_threads](size_t thread_idx, RSU rsu_) { #if MT_SUPPORT size_t modulo = hash.subcnt() / num_threads; // subcnt() returns the number of submaps for (int64_t i=0; ijoin(); } // -------------------------------------------------------------------------- size_t total_num_keys() { size_t n = 0; for (int i=0; i<16; ++i) n += s_num_keys[i]; return n; } // -------------------------------------------------------------------------- template Timer _fill_random2(int64_t cnt, HT &hash) { test = "random"; unsigned int seed = 76687; RSU rsu(seed, seed + 1); Timer timer(true); const int64_t num_loops = 10; inner_cnt = cnt / num_loops; for (int i=0; i<16; ++i) s_num_keys[i] = 0; for (loop_idx=0; loop_idx Timer _lookup(vector &v, HT &hash, size_t &num_present) { _fill_random(v, hash); num_present = 0; size_t max_val = v.size() * 10; Timer timer(true); for (size_t i = 0, sz = v.size(); i < sz; ++i) { num_present += (size_t)(hash.find(v[i]) != hash.end()); num_present += (size_t)(hash.find((T)(rand() % max_val)) != hash.end()); } return timer; } // -------------------------------------------------------------------------- template Timer _delete(vector &v, HT &hash) { _fill_random(v, hash); _shuffle(v); // don't delete in insertion order Timer timer(true); for(size_t i = 0, sz = v.size(); i < sz; ++i) hash.erase(v[i]); return timer; } // -------------------------------------------------------------------------- void memlog() { std::this_thread::sleep_for(std::chrono::milliseconds(10)); uint64_t nbytes_old_out = spp::GetProcessMemoryUsed(); uint64_t nbytes_old = spp::GetProcessMemoryUsed(); // last non outputted mem measurement outmem(test, 0, nbytes_old); int64_t last_loop = 0; while (!all_done) { uint64_t nbytes = spp::GetProcessMemoryUsed(); if ((double)_abs(nbytes - nbytes_old_out) / nbytes_old_out > 0.03 || (double)_abs(nbytes - nbytes_old) / nbytes_old > 0.01) { if ((double)(nbytes - nbytes_old) / nbytes_old > 0.03) outmem(test, total_num_keys() - 1, nbytes_old); outmem(test, total_num_keys(), nbytes); nbytes_old_out = nbytes; last_loop = loop_idx; } else if (loop_idx > last_loop) { outmem(test, total_num_keys(), nbytes); nbytes_old_out = nbytes; last_loop = loop_idx; } nbytes_old = nbytes; std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } // -------------------------------------------------------------------------- int main(int argc, char ** argv) { int64_t num_keys = 100000000; const char *bench_name = "random"; int64_t i, value = 0; if(argc > 2) { num_keys = atoi(argv[1]); bench_name = argv[2]; } hash_t hash; str_hash_t str_hash; srand(1); // for a fair/deterministic comparison Timer timer(true); #if MT_SUPPORT if (!strcmp(program_slug,"absl::parallel_flat_hash_map") || !strcmp(program_slug,"phmap::parallel_flat_hash_map")) program_slug = phmap_xstr(MAPNAME) "_mt"; #endif std::thread t1(memlog); try { if(!strcmp(bench_name, "sequential")) { for(i = 0; i < num_keys; i++) hash.insert(hash_t::value_type(i, value)); } #if 0 else if(!strcmp(bench_name, "random")) { vector v(num_keys); timer = _fill_random(v, hash); out("random", num_keys, timer); } #endif else if(!strcmp(bench_name, "random")) { fprintf(stderr, "size = %zu\n", sizeof(hash)); timer = _fill_random2(num_keys, hash); } else if(!strcmp(bench_name, "lookup")) { vector v(num_keys); size_t num_present; timer = _lookup(v, hash, num_present); //fprintf(stderr, "found %zu\n", num_present); } else if(!strcmp(bench_name, "delete")) { vector v(num_keys); timer = _delete(v, hash); } else if(!strcmp(bench_name, "sequentialstring")) { for(i = 0; i < num_keys; i++) str_hash.insert(str_hash_t::value_type(new_string_from_integer(i), value)); } else if(!strcmp(bench_name, "randomstring")) { for(i = 0; i < num_keys; i++) str_hash.insert(str_hash_t::value_type(new_string_from_integer((int)rand()), value)); } else if(!strcmp(bench_name, "deletestring")) { for(i = 0; i < num_keys; i++) str_hash.insert(str_hash_t::value_type(new_string_from_integer(i), value)); timer.reset(); for(i = 0; i < num_keys; i++) str_hash.erase(new_string_from_integer(i)); } //printf("%f\n", (float)((double)timer.elapsed().count() / 1000)); fflush(stdout); //std::this_thread::sleep_for(std::chrono::seconds(1000)); } catch (...) { } all_done = true; outmem(test, 0, 0, true); t1.join(); return 0; }