/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace dwarfs::writer::internal {
using namespace dwarfs::internal;
namespace {
bool inode_less_by_size(inode const* a, inode const* b) {
auto sa = a->size();
auto sb = b->size();
return sa > sb || (sa == sb && a->any()->less_revpath(*b->any()));
}
} // namespace
template
class inode_ordering_ final : public inode_ordering::impl {
public:
inode_ordering_(logger& lgr, progress& prog, inode_options const& opts)
: LOG_PROXY_INIT(lgr)
, prog_{prog}
, opts_{opts} {}
void by_inode_number(sortable_inode_span& sp) const override;
void by_path(sortable_inode_span& sp) const override;
void by_reverse_path(sortable_inode_span& sp) const override;
void
by_similarity(sortable_inode_span& sp, fragment_category cat) const override;
void
by_nilsimsa(worker_group& wg, similarity_ordering_options const& opts,
sortable_inode_span& sp, fragment_category cat) const override;
private:
void
by_nilsimsa_impl(worker_group& wg, similarity_ordering_options const& opts,
std::span const> inodes,
std::vector& index, fragment_category cat) const;
LOG_PROXY_DECL(LoggerPolicy);
progress& prog_;
inode_options const& opts_;
};
template
void inode_ordering_::by_inode_number(
sortable_inode_span& sp) const {
std::sort(
sp.index().begin(), sp.index().end(),
[r = sp.raw()](auto a, auto b) { return r[a]->num() < r[b]->num(); });
}
template
void inode_ordering_::by_path(sortable_inode_span& sp) const {
std::vector paths;
auto raw = sp.raw();
auto& index = sp.index();
paths.resize(raw.size());
for (auto i : index) {
paths[i] = raw[i]->any()->path_as_string();
}
std::sort(index.begin(), index.end(),
[&](auto a, auto b) { return paths[a] < paths[b]; });
}
template
void inode_ordering_::by_reverse_path(
sortable_inode_span& sp) const {
auto raw = sp.raw();
auto& index = sp.index();
std::sort(index.begin(), index.end(), [&](auto a, auto b) {
return raw[a]->any()->less_revpath(*raw[b]->any());
});
}
template
void inode_ordering_::by_similarity(sortable_inode_span& sp,
fragment_category cat) const {
std::vector> hash_cache;
auto raw = sp.raw();
auto& index = sp.index();
bool any_missing = false;
hash_cache.resize(raw.size());
for (auto i : index) {
auto& cache = hash_cache[i];
cache = raw[i]->similarity_hash(cat);
if (!cache.has_value()) {
any_missing = true;
}
}
auto size_pred = [&](auto a, auto b) {
return inode_less_by_size(raw[a].get(), raw[b].get());
};
auto start = index.begin();
if (any_missing) {
start = std::stable_partition(index.begin(), index.end(), [&](auto i) {
return !hash_cache[i].has_value();
});
std::sort(index.begin(), start, size_pred);
}
std::sort(start, index.end(), [&](auto a, auto b) {
assert(hash_cache[a].has_value());
assert(hash_cache[b].has_value());
auto const ca = *hash_cache[a];
auto const cb = *hash_cache[b];
if (ca < cb) {
return true;
}
if (ca > cb) {
return false;
}
return size_pred(a, b);
});
}
template
void inode_ordering_::by_nilsimsa(
worker_group& wg, similarity_ordering_options const& opts,
sortable_inode_span& sp, fragment_category cat) const {
auto raw = sp.raw();
auto& index = sp.index();
if (opts_.max_similarity_scan_size) {
auto mid = std::stable_partition(index.begin(), index.end(), [&](auto i) {
return !raw[i]->nilsimsa_similarity_hash(cat);
});
if (mid != index.begin()) {
std::sort(index.begin(), mid, [&](auto a, auto b) {
return inode_less_by_size(raw[a].get(), raw[b].get());
});
if (mid != index.end()) {
std::vector small_index(mid, index.end());
by_nilsimsa_impl(wg, opts, raw, small_index, cat);
std::copy(small_index.begin(), small_index.end(), mid);
}
return;
}
}
by_nilsimsa_impl(wg, opts, raw, index, cat);
}
template
void inode_ordering_::by_nilsimsa_impl(
worker_group& wg, similarity_ordering_options const& opts,
std::span const> inodes,
std::vector& index, fragment_category cat) const {
auto ev = inode_element_view(inodes, index, cat);
std::promise> promise;
auto future = promise.get_future();
auto sim_order = similarity_ordering(LOG_GET_LOGGER, prog_, wg, opts);
sim_order.order_nilsimsa(ev, make_receiver(std::move(promise)),
std::move(index));
future.get().swap(index);
}
inode_ordering::inode_ordering(logger& lgr, progress& prog,
inode_options const& opts)
: impl_(make_unique_logging_object(lgr, prog, opts)) {}
} // namespace dwarfs::writer::internal