/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace dwarfs::writer {
namespace internal {
using namespace std::placeholders;
class categorizer_manager_private : public categorizer_manager::impl {
public:
virtual std::vector> const&
categorizers() const = 0;
virtual fragment_category::value_type
category(std::string_view cat) const = 0;
};
template
class categorizer_job_ final : public categorizer_job::impl {
public:
categorizer_job_(logger& lgr, categorizer_manager_private const& mgr,
std::filesystem::path const& path)
: LOG_PROXY_INIT(lgr)
, mgr_{mgr}
, path_{path}
, cat_mapper_{std::bind(&categorizer_manager_private::category,
std::cref(mgr_), _1)} {}
void set_total_size(size_t total_size) override;
void categorize_random_access(std::span data) override;
void categorize_sequential(std::span data) override;
inode_fragments result() override;
bool best_result_found() const override;
private:
LOG_PROXY_DECL(LoggerPolicy);
categorizer_manager_private const& mgr_;
inode_fragments best_;
int index_{-1};
bool is_global_best_{false};
size_t total_size_{0};
std::vector>>
seq_jobs_;
std::filesystem::path const path_;
category_mapper cat_mapper_;
};
template
void categorizer_job_::set_total_size(size_t total_size) {
total_size_ = total_size;
}
template
void categorizer_job_::categorize_random_access(
std::span data) {
DWARFS_CHECK(index_ < 0,
"internal error: index already set in categorize_random_access");
total_size_ = data.size();
bool global_best = true;
for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) {
if (auto p = dynamic_cast(cat.get())) {
if (auto c = p->categorize(path_, data, cat_mapper_)) {
best_ = c;
index_ = index;
is_global_best_ = global_best;
break;
}
} else {
global_best = false;
}
}
}
template
void categorizer_job_::categorize_sequential(
std::span data) {
if (is_global_best_) {
return;
}
if (seq_jobs_.empty()) [[unlikely]] {
for (auto&& [index, cat] : ranges::views::enumerate(mgr_.categorizers())) {
if (index_ >= 0 && static_cast(index) >= index_) {
break;
}
if (auto p = dynamic_cast(cat.get())) {
if (auto job = p->job(path_, total_size_, cat_mapper_)) {
seq_jobs_.emplace_back(index, std::move(job));
}
}
}
}
for (auto&& [index, job] : seq_jobs_) {
job->add(data);
}
}
template
inode_fragments categorizer_job_::result() {
if (!seq_jobs_.empty()) {
for (auto&& [index, job] : seq_jobs_) {
if (auto c = job->result()) {
assert(index_ < 0 || index < index_);
best_ = c;
break;
}
}
seq_jobs_.clear();
}
LOG_TRACE << path_ << " -> "
<< best_.to_string([this](fragment_category::value_type c) {
return std::string(mgr_.category_name(c));
});
return best_;
}
template
bool categorizer_job_::best_result_found() const {
return is_global_best_;
}
template
class categorizer_manager_ final : public categorizer_manager_private {
public:
explicit categorizer_manager_(logger& lgr)
: lgr_{lgr}
, LOG_PROXY_INIT(lgr) {
add_category(categorizer::DEFAULT_CATEGORY,
std::numeric_limits::max());
}
void add(std::shared_ptr c) override;
categorizer_job job(std::filesystem::path const& path) const override;
std::string_view
category_name(fragment_category::value_type c) const override;
std::optional
category_value(std::string_view name) const override {
std::optional rv;
if (auto it = catmap_.find(name); it != catmap_.end()) {
rv.emplace(it->second);
}
return rv;
}
std::string category_metadata(fragment_category c) const override;
void set_metadata_requirements(fragment_category::value_type c,
std::string req) override;
bool
deterministic_less(fragment_category a, fragment_category b) const override;
std::vector> const&
categorizers() const override {
return categorizers_;
}
fragment_category::value_type category(std::string_view cat) const override {
auto it = catmap_.find(cat);
DWARFS_CHECK(it != catmap_.end(), fmt::format("unknown category: {}", cat));
return it->second;
}
private:
void add_category(std::string_view cat, size_t categorizer_index) {
if (catmap_.emplace(cat, categories_.size()).second) {
categories_.emplace_back(cat, categorizer_index);
} else {
LOG_WARN << "duplicate category: " << cat;
}
}
logger& lgr_;
LOG_PROXY_DECL(LoggerPolicy);
std::vector> categorizers_;
// TODO: category descriptions?
std::vector> categories_;
std::unordered_map catmap_;
};
template
void categorizer_manager_::add(std::shared_ptr c) {
for (auto const& cat : c->categories()) {
add_category(cat, categorizers_.size());
}
categorizers_.emplace_back(std::move(c));
}
template
categorizer_job categorizer_manager_::job(
std::filesystem::path const& path) const {
return categorizer_job(
make_unique_logging_object(lgr_, *this, path));
}
template
std::string_view categorizer_manager_::category_name(
fragment_category::value_type c) const {
return DWARFS_NOTHROW(categories_.at(c)).first;
}
template
std::string categorizer_manager_::category_metadata(
fragment_category c) const {
if (c.value() == 0) {
return std::string();
}
auto cat = DWARFS_NOTHROW(categories_.at(c.value()));
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
return categorizer->category_metadata(cat.first, c);
}
template
void categorizer_manager_::set_metadata_requirements(
fragment_category::value_type c, std::string req) {
auto cat = DWARFS_NOTHROW(categories_.at(c));
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
categorizer->set_metadata_requirements(cat.first, req);
}
template
bool categorizer_manager_::deterministic_less(
fragment_category a, fragment_category b) const {
auto cna = category_name(a.value());
auto cnb = category_name(b.value());
if (cna < cnb) {
return true;
}
if (cna > cnb) {
return false;
}
// TODO: the above can be replaced by the following once we have support for
// spaceship operator everywhere
//
// auto cmp = category_name(a.value()) <=> category_name(b.value());
// if (cmp != 0) {
// return cmp < 0;
// }
auto cat = DWARFS_NOTHROW(categories_.at(a.value()));
auto categorizer = DWARFS_NOTHROW(categorizers_.at(cat.second));
return categorizer->subcategory_less(a, b);
}
} // namespace internal
namespace po = boost::program_options;
std::string category_prefix(std::shared_ptr const& mgr,
fragment_category cat) {
return category_prefix(mgr.get(), cat);
}
std::string category_prefix(std::unique_ptr const& mgr,
fragment_category cat) {
return category_prefix(mgr.get(), cat);
}
std::string category_prefix(std::shared_ptr const& mgr,
fragment_category::value_type cat) {
return category_prefix(mgr.get(), cat);
}
std::string category_prefix(std::unique_ptr const& mgr,
fragment_category::value_type cat) {
return category_prefix(mgr.get(), cat);
}
std::string category_prefix(categorizer_manager const* mgr,
fragment_category::value_type cat) {
return category_prefix(mgr, fragment_category(cat));
}
std::string
category_prefix(categorizer_manager const* mgr, fragment_category cat) {
std::string prefix;
if (mgr) {
prefix = fmt::format(
"[{}{}] ", mgr->category_name(cat.value()),
cat.has_subcategory() ? fmt::format("/{}", cat.subcategory()) : "");
}
return prefix;
}
std::string
categorizer::category_metadata(std::string_view, fragment_category) const {
return std::string();
}
void categorizer::set_metadata_requirements(std::string_view,
std::string requirements) {
if (!requirements.empty()) {
compression_metadata_requirements().parse(
nlohmann::json::parse(requirements));
}
}
categorizer_job::categorizer_job() = default;
categorizer_job::categorizer_job(std::unique_ptr impl)
: impl_{std::move(impl)} {}
categorizer_manager::categorizer_manager(logger& lgr)
: impl_(make_unique_logging_object(lgr)) {}
fragment_category categorizer_manager::default_category() {
return fragment_category(0);
}
categorizer_registry& categorizer_registry::instance() {
static categorizer_registry the_instance;
return the_instance;
}
void categorizer_registry::register_factory(
std::unique_ptr&& factory) {
auto name = factory->name();
if (!factories_.emplace(name, std::move(factory)).second) {
std::cerr << "categorizer factory name conflict (" << name << "\n";
std::abort();
}
}
std::unique_ptr
categorizer_registry::create(logger& lgr, std::string const& name,
po::variables_map const& vm) const {
auto it = factories_.find(name);
if (it == factories_.end()) {
DWARFS_THROW(runtime_error, "unknown categorizer: " + name);
}
return it->second->create(lgr, vm);
}
void categorizer_registry::add_options(po::options_description& opts) const {
for (auto& f : factories_) {
if (auto f_opts = f.second->options()) {
opts.add(*f_opts);
}
}
}
std::vector categorizer_registry::categorizer_names() const {
std::vector rv;
for (auto& f : factories_) {
rv.emplace_back(f.first);
}
return rv;
}
categorizer_registry::categorizer_registry() {
using namespace ::dwarfs::writer::detail;
// binary_categorizer_factory_registrar(*this);
fits_categorizer_factory_registrar(*this);
incompressible_categorizer_factory_registrar(*this);
// libmagic_categorizer_factory_registrar(*this);
pcmaudio_categorizer_factory_registrar(*this);
}
categorizer_registry::~categorizer_registry() = default;
} // namespace dwarfs::writer