/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
// This is required to avoid Windows.h being pulled in by libarchive
// and polluting our environment with all sorts of shit.
#if _WIN32
#include
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace dwarfs::utility {
namespace internal {
using namespace dwarfs::internal;
namespace {
class cache_semaphore {
public:
void post(int64_t n) {
{
std::lock_guard lock(mx_);
size_ += n;
++count_;
}
condition_.notify_one();
}
void wait(int64_t n) {
std::unique_lock lock(mx_);
while (size_ < n && count_ <= 0) {
condition_.wait(lock);
}
size_ -= n;
--count_;
}
private:
std::mutex mx_;
std::condition_variable condition_;
int64_t count_{0};
int64_t size_{0};
};
class archive_error : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};
} // namespace
template
class filesystem_extractor_ final : public filesystem_extractor::impl {
public:
explicit filesystem_extractor_(logger& lgr, os_access const& os)
: LOG_PROXY_INIT(lgr)
, os_{os} {}
~filesystem_extractor_() override {
try {
close();
} catch (std::exception const& e) {
LOG_ERROR << "close() failed in destructor: " << e.what();
} catch (...) {
LOG_ERROR << "close() failed in destructor";
}
}
void open_archive(std::filesystem::path const& output,
std::string const& format) override {
a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str()));
#ifdef _WIN32
check_result(::archive_write_open_filename_w(
a_, output.empty() ? nullptr : output.wstring().c_str()));
#else
check_result(::archive_write_open_filename(
a_, output.empty() ? nullptr : output.string().c_str()));
#endif
}
void open_stream(std::ostream& os, std::string const& format) override {
#ifdef _WIN32
if (::_pipe(pipefd_, 8192, _O_BINARY) != 0) {
DWARFS_THROW(system_error, "_pipe()");
}
#else
if (::pipe(pipefd_) != 0) {
DWARFS_THROW(system_error, "pipe()");
}
#endif
iot_ = std::make_unique(
[this, &os, fd = pipefd_[0]] { pump(os, fd); });
a_ = ::archive_write_new();
check_result(::archive_write_set_format_by_name(a_, format.c_str()));
check_result(::archive_write_open_fd(a_, pipefd_[1]));
}
void open_disk(std::filesystem::path const& output) override {
if (!output.empty()) {
std::filesystem::current_path(output);
}
a_ = ::archive_write_disk_new();
check_result(::archive_write_disk_set_options(
a_,
ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_TIME |
ARCHIVE_EXTRACT_UNLINK | ARCHIVE_EXTRACT_SECURE_NOABSOLUTEPATHS |
ARCHIVE_EXTRACT_SECURE_NODOTDOT | ARCHIVE_EXTRACT_SECURE_SYMLINKS));
}
void close() override {
if (a_) {
check_result(::archive_write_close(a_));
::archive_write_free(a_);
a_ = nullptr;
}
closefd(pipefd_[1]);
if (iot_) {
iot_->join();
iot_.reset();
}
closefd(pipefd_[0]);
}
bool extract(reader::filesystem_v2 const& fs,
filesystem_extractor_options const& opts) override;
private:
void closefd(int& fd) {
if (fd >= 0) {
if (::close(fd) != 0) {
DWARFS_THROW(system_error, "close()");
}
fd = -1;
}
}
void pump(std::ostream& os, int fd) {
folly::setThreadName("pump");
std::array buf;
for (;;) {
// This is fine, we're simply reusing the buffer.
// Flawfinder: ignore
auto rv = ::read(fd, buf.data(), buf.size());
if (rv <= 0) {
if (rv < 0) {
LOG_ERROR << "read(): " << ::strerror(errno);
}
break;
}
os.write(buf.data(), rv);
}
}
void check_result(int res) {
switch (res) {
case ARCHIVE_OK:
break;
case ARCHIVE_WARN:
LOG_WARN << std::string(archive_error_string(a_));
break;
case ARCHIVE_RETRY:
case ARCHIVE_FAILED:
case ARCHIVE_FATAL:
throw archive_error(std::string(archive_error_string(a_)));
}
}
LOG_PROXY_DECL(debug_logger_policy);
os_access const& os_;
struct ::archive* a_{nullptr};
int pipefd_[2]{-1, -1};
std::unique_ptr iot_;
};
template
bool filesystem_extractor_::extract(
reader::filesystem_v2 const& fs, filesystem_extractor_options const& opts) {
DWARFS_CHECK(a_, "filesystem not opened");
auto lr = ::archive_entry_linkresolver_new();
scope_exit free_resolver{[&] { ::archive_entry_linkresolver_free(lr); }};
if (auto fmt = ::archive_format(a_)) {
::archive_entry_linkresolver_set_strategy(lr, fmt);
}
::archive_entry* spare = nullptr;
worker_group archiver(LOG_GET_LOGGER, os_, "archiver", 1);
cache_semaphore sem;
LOG_DEBUG << "extractor semaphore size: " << opts.max_queued_bytes
<< " bytes";
sem.post(opts.max_queued_bytes);
vfs_stat vfs;
fs.statvfs(&vfs);
std::atomic hard_error{0};
std::atomic soft_error{0};
std::atomic bytes_written{0};
uint64_t const bytes_total{vfs.blocks};
auto do_archive = [&](::archive_entry* ae,
reader::inode_view entry) { // TODO: inode vs. entry
if (auto size = ::archive_entry_size(ae);
entry.is_regular_file() && size > 0) {
auto fd = fs.open(entry);
std::string_view path{::archive_entry_pathname(ae)};
size_t pos = 0;
size_t remain = size;
while (remain > 0 && hard_error == 0) {
size_t bs =
remain < opts.max_queued_bytes ? remain : opts.max_queued_bytes;
sem.wait(bs);
std::error_code ec;
auto ranges = fs.readv(fd, bs, pos, ec);
if (!ec) {
archiver.add_job([this, &sem, &hard_error, &soft_error, &opts,
ranges = std::move(ranges), ae, pos, remain, bs,
size, path, &bytes_written, bytes_total]() mutable {
try {
if (pos == 0) {
LOG_DEBUG << "extracting " << path << " (" << size << " bytes)";
check_result(::archive_write_header(a_, ae));
}
for (auto& r : ranges) {
auto br = r.get();
LOG_TRACE << "[" << pos << "] writing " << br.size()
<< " bytes for " << path;
check_result(::archive_write_data(a_, br.data(), br.size()));
if (opts.progress) {
bytes_written += br.size();
opts.progress(path, bytes_written, bytes_total);
}
}
if (bs == remain) {
archive_entry_free(ae);
}
sem.post(bs);
} catch (archive_error const& e) {
LOG_ERROR << exception_str(e);
++hard_error;
} catch (...) {
if (opts.continue_on_error) {
LOG_WARN << exception_str(std::current_exception());
++soft_error;
} else {
LOG_ERROR << exception_str(std::current_exception());
++hard_error;
}
archive_entry_free(ae);
}
});
} else {
LOG_ERROR << "error reading " << bs << " bytes at offset " << pos
<< " from inode [" << fd << "]: " << ec.message();
break;
}
pos += bs;
remain -= bs;
}
} else {
archiver.add_job([this, ae, &hard_error] {
scope_exit free_entry{[&] { ::archive_entry_free(ae); }};
try {
check_result(::archive_write_header(a_, ae));
} catch (...) {
LOG_ERROR << exception_str(std::current_exception());
hard_error = true;
}
});
}
};
fs.walk_data_order([&](auto entry) {
// TODO: we can surely early abort walk() somehow
if (entry.is_root() || hard_error) {
return;
}
auto inode = entry.inode();
auto ae = ::archive_entry_new();
auto stbuf = fs.getattr(inode);
struct stat st;
::memset(&st, 0, sizeof(st));
#ifdef _WIN32
stbuf.copy_to_without_block_info(&st);
#else
stbuf.copy_to(&st);
#endif
#ifdef _WIN32
::archive_entry_copy_pathname_w(ae, entry.wpath().c_str());
#else
::archive_entry_copy_pathname(ae, entry.path().c_str());
#endif
::archive_entry_copy_stat(ae, &st);
if (inode.is_symlink()) {
std::error_code ec;
auto link = fs.readlink(inode, ec);
if (ec) {
LOG_ERROR << "readlink() failed: " << ec.message();
}
if (opts.progress) {
bytes_written += link.size();
}
#ifdef _WIN32
std::filesystem::path linkpath(string_to_u8string(link));
::archive_entry_copy_symlink_w(ae, linkpath.wstring().c_str());
#else
::archive_entry_copy_symlink(ae, link.c_str());
#endif
}
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
do_archive(ae, inode);
}
if (spare) {
auto ev = fs.find(::archive_entry_ino(spare));
if (!ev) {
LOG_ERROR << "find() failed";
}
LOG_INFO << "archiving spare " << ::archive_entry_pathname(spare);
do_archive(spare, *ev);
}
});
archiver.wait();
if (hard_error) {
DWARFS_THROW(runtime_error, "extraction aborted");
}
// As we're visiting *all* hardlinks, we should never see any deferred
// entries.
::archive_entry* ae = nullptr;
::archive_entry_linkify(lr, &ae, &spare);
if (ae) {
DWARFS_THROW(runtime_error, "unexpected deferred entry");
}
if (soft_error > 0) {
LOG_ERROR << "extraction finished with " << soft_error << " error(s)";
return false;
}
LOG_INFO << "extraction finished without errors";
return true;
}
} // namespace internal
filesystem_extractor::filesystem_extractor(logger& lgr, os_access const& os)
: impl_(make_unique_logging_object(lgr, os)) {}
void filesystem_extractor::add_library_dependencies(
library_dependencies& deps) {
deps.add_library(::archive_version_string());
}
} // namespace dwarfs::utility