/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#ifdef _WIN32
#include
#else
#include
#endif
#include
#include
#include
#include
#if FMT_VERSION >= 110000
#include
#endif
#include
#include
#include
#include
namespace dwarfs {
namespace internal {
namespace {
class single_timer {
public:
static constexpr uint64_t const histogram_interval = 1;
single_timer(std::string const& name_space, std::string const& name,
std::initializer_list context)
: log_hist_{1, 0, 64}
, namespace_{name_space}
, name_{name}
, context_{context.begin(), context.end()} {
total_time_.store(0);
}
void add_sample(uint64_t elapsed) {
total_time_.fetch_add(elapsed);
if ((samples_.fetch_add(1) % histogram_interval) == 0) {
auto log_time = folly::findLastSet(elapsed);
std::lock_guard lock(log_hist_mutex_);
log_hist_.addValue(log_time);
}
}
std::string_view get_namespace() const { return namespace_; }
std::string_view name() const { return name_; }
uint64_t total_latency() const { return total_time_.load(); }
void summarize(std::ostream& os, double timebase) const {
if (samples_.load() == 0) {
return;
}
size_t log_p50, log_p90, log_p99;
{
std::lock_guard lock(log_hist_mutex_);
log_p50 = log_hist_.getPercentileEstimate(0.5);
log_p90 = log_hist_.getPercentileEstimate(0.9);
log_p99 = log_hist_.getPercentileEstimate(0.99);
}
auto samples = samples_.load();
auto total_time = total_time_.load();
auto tot = timebase * total_time;
auto avg = tot / samples;
auto p50 = timebase * (UINT64_C(1) << log_p50);
auto p90 = timebase * (UINT64_C(1) << log_p90);
auto p99 = timebase * (UINT64_C(1) << log_p99);
os << "[" << namespace_ << "." << name_ << "]\n";
os << " samples: " << samples << "\n";
os << " overall: " << time_with_unit(tot) << "\n";
os << " avg latency: " << time_with_unit(avg) << "\n";
os << " p50 latency: " << time_with_unit(p50) << "\n";
os << " p90 latency: " << time_with_unit(p90) << "\n";
os << " p99 latency: " << time_with_unit(p99) << "\n\n";
}
std::span context() const { return context_; }
private:
std::atomic samples_{};
std::atomic total_time_{};
folly::Histogram log_hist_;
std::mutex mutable log_hist_mutex_;
std::string const namespace_;
std::string const name_;
std::vector const context_;
};
} // namespace
class performance_monitor_impl : public performance_monitor {
public:
using timer_id = performance_monitor::timer_id;
using time_type = performance_monitor::time_type;
struct trace_event {
trace_event(timer_id id, time_type start, time_type end,
std::span ctx)
: id{id}
, start{start}
, end{end}
, context{ctx.begin(), ctx.end()} {}
// TODO: workaround for older boost small_vector
std::span context_span() const {
return std::span{context.data(), context.size()};
}
timer_id id;
time_type start;
time_type end;
small_vector context;
};
explicit performance_monitor_impl(
std::unordered_set enabled_namespaces,
std::shared_ptr fa,
std::optional trace_file)
: timebase_{get_timebase()}
, enabled_namespaces_{std::move(enabled_namespaces)}
, start_time_{now()}
, trace_file_{std::move(trace_file)}
, fa_{std::move(fa)} {}
~performance_monitor_impl() override {
if (trace_file_) {
write_trace_events(*trace_file_);
}
}
timer_id
setup_timer(std::string const& ns, std::string const& name,
std::initializer_list context) const override {
std::lock_guard lock(timers_mx_);
timer_id rv = timers_.size();
timers_.emplace_back(ns, name, context);
return rv;
}
time_type now() const override {
#ifdef _WIN32
::LARGE_INTEGER ticks;
::QueryPerformanceCounter(&ticks);
return ticks.QuadPart;
#else
struct timespec ts;
::clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return UINT64_C(1'000'000'000) * ts.tv_sec + ts.tv_nsec;
#endif
}
void add_sample(timer_id id, time_type start,
std::span context) const override {
auto end = now();
// No need to acquire the mutex here as existing timers
// never move in the deque.
auto& t = timers_[id];
t.add_sample(end - start);
if (trace_file_) {
std::vector* events;
{
std::lock_guard lock(events_mx_);
auto& evp = trace_events_[std::this_thread::get_id()];
if (!evp) {
evp = std::make_unique>();
}
events = evp.get();
}
events->emplace_back(id, start, end, context);
}
}
void summarize(std::ostream& os) const override {
int count;
{
std::lock_guard lock(timers_mx_);
count = timers_.size();
}
std::vector> ts;
for (int i = 0; i < count; ++i) {
auto& t = timers_[i];
ts.emplace_back(t.get_namespace(), t.total_latency(), i);
}
std::sort(ts.begin(), ts.end(), [](auto const& a, auto const& b) {
return std::get<0>(a) < std::get<0>(b) ||
(std::get<0>(a) == std::get<0>(b) &&
std::get<1>(a) > std::get<1>(b));
});
for (auto const& t : ts) {
timers_[std::get<2>(t)].summarize(os, timebase_);
}
}
bool is_enabled(std::string const& ns) const override {
return enabled_namespaces_.find(ns) != enabled_namespaces_.end();
}
bool wants_context() const override { return trace_file_.has_value(); }
private:
static double get_timebase() {
#ifdef _WIN32
::LARGE_INTEGER freq;
::QueryPerformanceFrequency(&freq);
return 1.0 / freq.QuadPart;
#else
return 1e-9;
#endif
}
struct json_trace_event {
json_trace_event(timer_id id, int tid, char ph, time_type ts,
std::span ctxt = {})
: id{id}
, tid{tid}
, ph{ph}
, ts{ts}
, args{ctxt.begin(), ctxt.end()} {}
timer_id id;
int tid;
char ph;
time_type ts;
std::vector args;
};
void write_trace_events(std::filesystem::path const& path) const {
std::error_code ec;
auto output = fa_->open_output(path, ec);
if (ec) {
return; // Meh.
}
std::vector events;
std::unordered_map thread_ids;
{
std::lock_guard lock(events_mx_);
int max_tid{0};
for (auto const& [tid, evs] : trace_events_) {
auto it = thread_ids.find(tid);
if (it == thread_ids.end()) {
it = thread_ids.emplace(tid, ++max_tid).first;
}
for (auto const& ev : *evs) {
events.emplace_back(ev.id, it->second, 'B', ev.start - start_time_,
ev.context_span());
events.emplace_back(ev.id, it->second, 'E', ev.end - start_time_);
}
}
}
std::sort(events.begin(), events.end(),
[](auto const& a, auto const& b) { return a.ts < b.ts; });
bool first = true;
auto const pid = ::getpid();
auto& os = output->os();
os << "[\n";
std::lock_guard lock(timers_mx_);
for (auto const& ev : events) {
if (!first) {
os << ",\n";
}
first = false;
auto& t = timers_[ev.id];
std::string name;
if (!ev.args.empty()) {
name = fmt::format("{}({})", t.name(), fmt::join(ev.args, ", "));
} else {
name = t.name();
}
os << fmt::format(" {{\n \"name\": \"{}\",\n \"cat\": \"{}\",\n",
name, t.get_namespace());
os << fmt::format(" \"ph\": \"{}\",\n \"ts\": {:.3f},\n", ev.ph,
1e6 * ev.ts * timebase_);
os << fmt::format(" \"pid\": {},\n \"tid\": {}", pid, ev.tid);
if (!ev.args.empty()) {
auto ctx_names = t.context();
os << ",\n \"args\": {";
for (auto [i, arg] : ranges::views::enumerate(ev.args)) {
if (i > 0) {
os << ", ";
}
std::string arg_name;
if (i < ctx_names.size()) {
arg_name = ctx_names[i];
} else {
arg_name = fmt::format("arg{}", i);
}
os << fmt::format("\"{}\": {}", arg_name, arg);
}
os << "}";
}
os << "\n }";
}
os << "\n]\n";
output->close();
}
std::deque mutable timers_;
std::mutex mutable timers_mx_;
double const timebase_;
std::unordered_set const enabled_namespaces_;
time_type const start_time_;
std::optional const trace_file_;
std::mutex mutable events_mx_;
std::unordered_map<
std::thread::id,
std::unique_ptr>> mutable trace_events_;
std::shared_ptr fa_;
};
} // namespace internal
performance_monitor_proxy::performance_monitor_proxy(
std::shared_ptr mon,
std::string const& mon_namespace)
: mon_{mon && mon->is_enabled(mon_namespace) ? std::move(mon) : nullptr}
, namespace_{mon_namespace} {}
std::unique_ptr performance_monitor::create(
std::unordered_set const& enabled_namespaces,
std::shared_ptr fa,
std::optional trace_file) {
return enabled_namespaces.empty()
? nullptr
: std::make_unique(
std::move(enabled_namespaces), std::move(fa),
std::move(trace_file));
}
} // namespace dwarfs