/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace dwarfs::writer::internal {
namespace fs = std::filesystem;
namespace {
constexpr std::string_view const kHashContext{"[hashing] "};
constexpr char const kLocalPathSeparator{
static_cast(fs::path::preferred_separator)};
bool is_root_path(std::string_view path) {
#ifdef _WIN32
return path == "/" || path == "\\";
#else
return path == "/";
#endif
}
std::string entry_name(fs::path const& path, bool has_parent) {
if (has_parent) {
return u8string_to_string(path.filename().u8string());
}
return u8string_to_string(path.u8string());
}
} // namespace
entry::entry(fs::path const& path, std::shared_ptr parent,
file_stat const& st)
: name_{entry_name(path, static_cast(parent))}
, parent_{std::move(parent)}
, stat_{st} {}
bool entry::has_parent() const {
if (parent_.lock()) {
return true;
}
return false;
}
std::shared_ptr entry::parent() const { return parent_.lock(); }
void entry::set_name(const std::string& name) { name_ = name; }
std::u8string entry::u8name() const { return string_to_u8string(name_); }
fs::path entry::fs_path() const {
if (auto parent = parent_.lock()) {
return parent->fs_path() / u8name();
}
return fs::path(u8name());
}
std::string entry::path_as_string() const {
return u8string_to_string(fs_path().u8string());
}
std::string entry::dpath() const {
auto p = path_as_string();
if (is_root_path(p)) {
return std::string(1, kLocalPathSeparator);
}
if (type() == E_DIR) {
p += kLocalPathSeparator;
}
return p;
}
std::string entry::unix_dpath() const {
auto p = name_;
if (is_root_path(p)) {
return "/";
}
if (type() == E_DIR && !p.empty() && !p.ends_with(kLocalPathSeparator)) {
p += '/';
}
if (auto parent = parent_.lock()) {
return parent->unix_dpath() + p;
} else if constexpr (kLocalPathSeparator != '/') {
std::replace(p.begin(), p.end(), kLocalPathSeparator, '/');
}
return p;
}
bool entry::less_revpath(entry const& rhs) const {
if (name() < rhs.name()) {
return true;
}
if (name() > rhs.name()) {
return false;
}
auto p = parent();
auto rhs_p = rhs.parent();
if (p && rhs_p) {
return p->less_revpath(*rhs_p);
}
return static_cast(rhs_p);
}
bool entry::is_directory() const { return stat_.is_directory(); }
void entry::walk(std::function const& f) { f(this); }
void entry::walk(std::function const& f) const { f(this); }
void entry::update(global_entry_data& data) const {
stat_.ensure_valid(file_stat::uid_valid | file_stat::gid_valid |
file_stat::mode_valid | file_stat::atime_valid |
file_stat::mtime_valid | file_stat::ctime_valid);
data.add_uid(stat_.uid_unchecked());
data.add_gid(stat_.gid_unchecked());
data.add_mode(stat_.mode_unchecked());
data.add_atime(stat_.atime_unchecked());
data.add_mtime(stat_.mtime_unchecked());
data.add_ctime(stat_.ctime_unchecked());
}
void entry::pack(thrift::metadata::inode_data& entry_v2,
global_entry_data const& data) const {
stat_.ensure_valid(file_stat::uid_valid | file_stat::gid_valid |
file_stat::mode_valid | file_stat::atime_valid |
file_stat::mtime_valid | file_stat::ctime_valid);
entry_v2.mode_index() = data.get_mode_index(stat_.mode_unchecked());
entry_v2.owner_index() = data.get_uid_index(stat_.uid_unchecked());
entry_v2.group_index() = data.get_gid_index(stat_.gid_unchecked());
entry_v2.atime_offset() = data.get_atime_offset(stat_.atime_unchecked());
entry_v2.mtime_offset() = data.get_mtime_offset(stat_.mtime_unchecked());
entry_v2.ctime_offset() = data.get_ctime_offset(stat_.ctime_unchecked());
}
size_t entry::size() const { return stat_.size(); }
uint64_t entry::raw_inode_num() const { return stat_.ino(); }
uint64_t entry::num_hard_links() const { return stat_.nlink(); }
void entry::override_size(size_t size) { stat_.set_size(size); }
entry::type_t file::type() const { return E_FILE; }
auto entry::get_permissions() const -> mode_type { return stat_.permissions(); }
void entry::set_permissions(mode_type perm) { stat_.set_permissions(perm); }
auto entry::get_uid() const -> uid_type { return stat_.uid(); }
void entry::set_uid(uid_type uid) { stat_.set_uid(uid); }
auto entry::get_gid() const -> gid_type { return stat_.gid(); }
void entry::set_gid(gid_type gid) { stat_.set_gid(gid); }
uint64_t entry::get_atime() const { return stat_.atime(); }
void entry::set_atime(uint64_t atime) { stat_.set_atime(atime); }
uint64_t entry::get_mtime() const { return stat_.mtime(); }
void entry::set_mtime(uint64_t mtime) { stat_.set_mtime(mtime); }
uint64_t entry::get_ctime() const { return stat_.ctime(); }
void entry::set_ctime(uint64_t ctime) { stat_.set_ctime(ctime); }
std::string_view file::hash() const {
auto& h = data_->hash;
return std::string_view(h.data(), h.size());
}
void file::set_inode(std::shared_ptr ino) {
if (inode_) {
DWARFS_THROW(runtime_error, "inode already set for file");
}
inode_ = std::move(ino);
}
std::shared_ptr file::get_inode() const { return inode_; }
void file::accept(entry_visitor& v, bool) { v.visit(this); }
void file::scan(os_access const& /*os*/, progress& /*prog*/) {
DWARFS_THROW(runtime_error, "file::scan() without hash_alg is not used");
}
void file::scan(mmif* mm, progress& prog,
std::optional const& hash_alg) {
size_t s = size();
if (hash_alg) {
progress::scan_updater supd(prog.hash, s);
checksum cs(*hash_alg);
if (s > 0) {
std::shared_ptr pctx;
auto const chunk_size = prog.hash.chunk_size.load();
if (s >= 4 * chunk_size) {
pctx = prog.create_context(
termcolor::MAGENTA, kHashContext, path_as_string(), s);
}
size_t offset = 0;
assert(mm);
while (s >= chunk_size) {
cs.update(mm->as(offset), chunk_size);
mm->release_until(offset);
offset += chunk_size;
s -= chunk_size;
if (pctx) {
pctx->bytes_processed += chunk_size;
}
}
cs.update(mm->as(offset), s);
}
data_->hash.resize(cs.digest_size());
DWARFS_CHECK(cs.finalize(data_->hash.data()),
"checksum computation failed");
}
}
uint32_t file::unique_file_id() const { return inode_->num(); }
void file::set_inode_num(uint32_t inode_num) {
DWARFS_CHECK(data_, "file data unset");
DWARFS_CHECK(!data_->inode_num, "attempt to set inode number more than once");
data_->inode_num = inode_num;
}
std::optional const& file::inode_num() const {
DWARFS_CHECK(data_, "file data unset");
return data_->inode_num;
}
void file::create_data() {
assert(!data_);
data_ = std::make_shared();
}
void file::hardlink(file* other, progress& prog) {
assert(!data_);
assert(other->data_);
prog.hardlink_size += size();
++prog.hardlinks;
data_ = other->data_;
++data_->refcount;
}
entry::type_t dir::type() const { return E_DIR; }
void dir::add(std::shared_ptr e) {
if (lookup_) {
auto r [[maybe_unused]] = lookup_->emplace(e->name(), e);
assert(r.second);
}
entries_.emplace_back(std::move(e));
}
void dir::walk(std::function const& f) {
f(this);
for (entry_ptr const& e : entries_) {
e->walk(f);
}
}
void dir::walk(std::function const& f) const {
f(this);
for (entry_ptr const& e : entries_) {
const_cast(e.get())->walk(f);
}
}
void dir::accept(entry_visitor& v, bool preorder) {
if (preorder) {
v.visit(this);
}
for (entry_ptr const& e : entries_) {
e->accept(v, preorder);
}
if (!preorder) {
v.visit(this);
}
}
void dir::sort() {
std::sort(entries_.begin(), entries_.end(),
[](entry_ptr const& a, entry_ptr const& b) {
return a->name() < b->name();
});
}
void dir::scan(os_access const&, progress&) {}
void dir::pack_entry(thrift::metadata::metadata& mv2,
global_entry_data const& data) const {
auto& de = mv2.dir_entries()->emplace_back();
de.name_index() = has_parent() ? data.get_name_index(name()) : 0;
de.inode_num() = DWARFS_NOTHROW(inode_num().value());
entry::pack(DWARFS_NOTHROW(mv2.inodes()->at(de.inode_num().value())), data);
}
void dir::pack(thrift::metadata::metadata& mv2,
global_entry_data const& data) const {
thrift::metadata::directory d;
if (has_parent()) {
auto pd = std::dynamic_pointer_cast(parent());
DWARFS_CHECK(pd, "unexpected parent entry (not a directory)");
auto pe = pd->entry_index();
DWARFS_CHECK(pe, "parent entry index not set");
d.parent_entry() = *pe;
} else {
d.parent_entry() = 0;
}
d.first_entry() = mv2.dir_entries()->size();
mv2.directories()->push_back(d);
for (entry_ptr const& e : entries_) {
e->set_entry_index(mv2.dir_entries()->size());
auto& de = mv2.dir_entries()->emplace_back();
de.name_index() = data.get_name_index(e->name());
de.inode_num() = DWARFS_NOTHROW(e->inode_num().value());
e->pack(DWARFS_NOTHROW(mv2.inodes()->at(de.inode_num().value())), data);
}
}
void dir::remove_empty_dirs(progress& prog) {
auto last = std::remove_if(entries_.begin(), entries_.end(),
[&](std::shared_ptr const& e) {
if (auto d = dynamic_cast(e.get())) {
d->remove_empty_dirs(prog);
return d->empty();
}
return false;
});
if (last != entries_.end()) {
auto num = std::distance(last, entries_.end());
prog.dirs_scanned -= num;
prog.dirs_found -= num;
entries_.erase(last, entries_.end());
}
lookup_.reset();
}
std::shared_ptr dir::find(fs::path const& path) {
auto name = u8string_to_string(path.filename().u8string());
if (!lookup_ && entries_.size() >= 16) {
populate_lookup_table();
}
if (lookup_) {
if (auto it = lookup_->find(name); it != lookup_->end()) {
return it->second;
}
} else {
auto it = std::find_if(entries_.begin(), entries_.end(),
[name](auto& e) { return e->name() == name; });
if (it != entries_.end()) {
return *it;
}
}
return nullptr;
}
void dir::populate_lookup_table() {
assert(!lookup_);
lookup_ = std::make_unique();
lookup_->reserve(entries_.size());
for (auto const& e : entries_) {
auto r [[maybe_unused]] = lookup_->emplace(e->name(), e);
assert(r.second);
}
}
entry::type_t link::type() const { return E_LINK; }
const std::string& link::linkname() const { return link_; }
void link::accept(entry_visitor& v, bool) { v.visit(this); }
void link::scan(os_access const& os, progress& prog) {
link_ = u8string_to_string(os.read_symlink(fs_path()).u8string());
prog.original_size += size();
prog.symlink_size += size();
}
entry::type_t device::type() const {
switch (status().type()) {
case posix_file_type::character:
case posix_file_type::block:
return E_DEVICE;
default:
return E_OTHER;
}
}
void device::accept(entry_visitor& v, bool) { v.visit(this); }
void device::scan(os_access const&, progress&) {}
uint64_t device::device_id() const { return status().rdev(); }
} // namespace dwarfs::writer::internal