/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "dwarfs/filesystem_v2.h"
#include "dwarfs/history.h"
#include "dwarfs/iovec_read_buf.h"
#include "dwarfs/logger.h"
#include "dwarfs/util.h"
#include "dwarfs_tool_main.h"
#include "filter_test_data.h"
#include "loremipsum.h"
#include "mmap_mock.h"
#include "test_helpers.h"
#include "test_logger.h"
using namespace dwarfs;
namespace fs = std::filesystem;
namespace {
// TODO: this is a workaround for older Clang versions
struct fs_path_hash {
auto operator()(const std::filesystem::path& p) const noexcept {
return std::filesystem::hash_value(p);
}
};
auto test_dir = fs::path(TEST_DATA_DIR).make_preferred();
auto audio_data_dir = test_dir / "pcmaudio";
auto fits_data_dir = test_dir / "fits";
constexpr std::array const log_level_strings{
"error", "warn", "info", "verbose", "debug", "trace"};
enum class input_mode {
from_file,
from_stdin,
};
constexpr std::array const input_modes = {
input_mode::from_file, input_mode::from_stdin};
std::ostream& operator<<(std::ostream& os, input_mode m) {
switch (m) {
case input_mode::from_file:
os << "from_file";
break;
case input_mode::from_stdin:
os << "from_stdin";
break;
}
return os;
}
struct locale_setup_helper {
locale_setup_helper() { setup_default_locale(); }
};
void setup_locale() { static locale_setup_helper helper; }
class tool_main_test : public testing::Test {
public:
void SetUp() override {
setup_locale();
iol = std::make_unique();
}
void TearDown() override { iol.reset(); }
std::string out() const { return iol->out(); }
std::string err() const { return iol->err(); }
std::unique_ptr iol;
};
class tester_common {
public:
using main_ptr_t = int (*)(std::span, iolayer const&);
tester_common(main_ptr_t mp, std::string toolname,
std::shared_ptr pos)
: fa{std::make_shared()}
, os{std::move(pos)}
, iol{std::make_unique(os, fa)}
, main_{mp}
, toolname_{std::move(toolname)} {
setup_locale();
}
int run(std::vector args) {
args.insert(args.begin(), toolname_);
return main_(args, iol->get());
}
int run(std::initializer_list args) {
return run(std::vector(args));
}
int run(std::string args) { return run(test::parse_args(args)); }
std::string out() const { return iol->out(); }
std::string err() const { return iol->err(); }
std::shared_ptr fa;
std::shared_ptr os;
std::unique_ptr iol;
private:
main_ptr_t main_;
std::string toolname_;
};
struct random_file_tree_options {
double avg_size{4096.0};
int dimension{20};
int max_name_len{50};
bool with_errors{false};
bool with_invalid_utf8{false};
};
class mkdwarfs_tester : public tester_common {
public:
mkdwarfs_tester(std::shared_ptr pos)
: tester_common(mkdwarfs_main, "mkdwarfs", std::move(pos)) {}
mkdwarfs_tester()
: mkdwarfs_tester(test::os_access_mock::create_test_instance()) {}
static mkdwarfs_tester create_empty() {
return mkdwarfs_tester(std::make_shared());
}
void add_stream_logger(std::ostream& os,
logger::level_type level = logger::VERBOSE) {
lgr = std::make_unique(
std::make_shared(os, os), os,
logger_options{.threshold = level});
}
void add_root_dir() { os->add("", {1, 040755, 1, 0, 0, 10, 42, 0, 0, 0}); }
void add_special_files() {
static constexpr file_stat::off_type const size = 10;
std::string data(size, 'x');
os->add("suid", {1001, 0104755, 1, 0, 0, size, 0, 3333, 2222, 1111}, data);
os->add("sgid", {1002, 0102755, 1, 0, 0, size, 0, 0, 0, 0}, data);
os->add("sticky", {1003, 0101755, 1, 0, 0, size, 0, 0, 0, 0}, data);
os->add("block", {1004, 060666, 1, 0, 0, 0, 77, 0, 0, 0}, std::string{});
os->add("sock", {1005, 0140666, 1, 0, 0, 0, 0, 0, 0, 0}, std::string{});
}
std::vector> add_random_file_tree(
random_file_tree_options const& opt = random_file_tree_options{}) {
size_t max_size{128 * static_cast(opt.avg_size)};
std::mt19937_64 rng{42};
std::exponential_distribution<> size_dist{1 / opt.avg_size};
std::uniform_int_distribution<> path_comp_size_dist{0, opt.max_name_len};
std::uniform_int_distribution<> invalid_dist{0, 1};
std::vector> paths;
auto random_path_component = [&] {
auto size = path_comp_size_dist(rng);
if (opt.with_invalid_utf8 && invalid_dist(rng) == 0) {
return test::create_random_string(size, 96, 255, rng);
}
return test::create_random_string(size, 'A', 'Z', rng);
};
for (int x = 0; x < opt.dimension; ++x) {
fs::path d1{random_path_component() + std::to_string(x)};
os->add_dir(d1);
for (int y = 0; y < opt.dimension; ++y) {
fs::path d2{d1 / (random_path_component() + std::to_string(y))};
os->add_dir(d2);
for (int z = 0; z < opt.dimension; ++z) {
fs::path f{d2 / (random_path_component() + std::to_string(z))};
auto size = std::min(max_size, static_cast(size_dist(rng)));
std::string data;
if (size < 1024 * 1024 && rng() % 2 == 0) {
data = test::create_random_string(size, rng);
} else {
data = test::loremipsum(size);
}
os->add_file(f, data);
paths.emplace_back(f, data);
if (opt.with_errors) {
auto failpath = fs::path{"/"} / f;
switch (rng() % 8) {
case 0:
os->set_access_fail(failpath);
[[fallthrough]];
case 1:
case 2:
os->set_map_file_error(
failpath,
std::make_exception_ptr(std::runtime_error("map_file_error")),
rng() % 4);
break;
default:
break;
}
}
}
}
}
return paths;
}
void add_test_file_tree() {
for (auto const& [stat, name] : test::test_dirtree()) {
auto path = name.substr(name.size() == 5 ? 5 : 6);
switch (stat.type()) {
case posix_file_type::regular:
os->add(path, stat,
[size = stat.size] { return test::loremipsum(size); });
break;
case posix_file_type::symlink:
os->add(path, stat, test::loremipsum(stat.size));
break;
default:
os->add(path, stat);
break;
}
}
}
filesystem_v2
fs_from_data(std::string data, filesystem_options const& opt = {}) {
if (!lgr) {
lgr = std::make_unique();
}
auto mm = std::make_shared(std::move(data));
return filesystem_v2(*lgr, *os, mm, opt);
}
filesystem_v2
fs_from_file(std::string path, filesystem_options const& opt = {}) {
auto fsimage = fa->get_file(path);
if (!fsimage) {
throw std::runtime_error("file not found: " + path);
}
return fs_from_data(std::move(fsimage.value()), opt);
}
filesystem_v2 fs_from_stdout(filesystem_options const& opt = {}) {
return fs_from_data(out(), opt);
}
std::unique_ptr lgr;
};
std::string
build_test_image(std::vector extra_args = {},
std::map extra_files = {}) {
mkdwarfs_tester t;
for (auto const& [name, contents] : extra_files) {
t.fa->set_file(name, contents);
}
std::vector args = {"-i", "/", "-o", "-"};
args.insert(args.end(), extra_args.begin(), extra_args.end());
if (t.run(args) != 0) {
throw std::runtime_error("failed to build test image:\n" + t.err());
}
return t.out();
}
class dwarfsck_tester : public tester_common {
public:
dwarfsck_tester(std::shared_ptr pos)
: tester_common(dwarfsck_main, "dwarfsck", std::move(pos)) {}
dwarfsck_tester()
: dwarfsck_tester(std::make_shared()) {}
static dwarfsck_tester create_with_image(std::string image) {
auto os = std::make_shared();
os->add("", {1, 040755, 1, 0, 0, 10, 42, 0, 0, 0});
os->add_file("image.dwarfs", std::move(image));
return dwarfsck_tester(std::move(os));
}
static dwarfsck_tester create_with_image() {
return create_with_image(build_test_image());
}
};
class dwarfsextract_tester : public tester_common {
public:
dwarfsextract_tester(std::shared_ptr pos)
: tester_common(dwarfsextract_main, "dwarfsextract", std::move(pos)) {}
dwarfsextract_tester()
: dwarfsextract_tester(std::make_shared()) {}
static dwarfsextract_tester create_with_image(std::string image) {
auto os = std::make_shared();
os->add("", {1, 040755, 1, 0, 0, 10, 42, 0, 0, 0});
os->add_file("image.dwarfs", std::move(image));
return dwarfsextract_tester(std::move(os));
}
static dwarfsextract_tester create_with_image() {
return create_with_image(build_test_image());
}
};
std::tuple, mkdwarfs_tester>
build_with_args(std::vector opt_args = {}) {
std::string const image_file = "test.dwarfs";
mkdwarfs_tester t;
std::vector args = {"-i", "/", "-o", image_file};
args.insert(args.end(), opt_args.begin(), opt_args.end());
if (t.run(args) != 0) {
return {std::nullopt, std::move(t)};
}
return {t.fs_from_file(image_file), std::move(t)};
}
std::set get_all_fs_times(filesystem_v2 const& fs) {
std::set times;
fs.walk([&](auto const& e) {
file_stat st;
fs.getattr(e.inode(), &st);
times.insert(st.atime);
times.insert(st.ctime);
times.insert(st.mtime);
});
return times;
}
std::set get_all_fs_uids(filesystem_v2 const& fs) {
std::set uids;
fs.walk([&](auto const& e) {
file_stat st;
fs.getattr(e.inode(), &st);
uids.insert(st.uid);
});
return uids;
}
std::set get_all_fs_gids(filesystem_v2 const& fs) {
std::set gids;
fs.walk([&](auto const& e) {
file_stat st;
fs.getattr(e.inode(), &st);
gids.insert(st.gid);
});
return gids;
}
} // namespace
class mkdwarfs_main_test : public tool_main_test {
public:
int run(std::vector args) {
args.insert(args.begin(), "mkdwarfs");
return mkdwarfs_main(args, iol->get());
}
};
class dwarfsck_main_test : public tool_main_test {
public:
int run(std::vector args) {
args.insert(args.begin(), "dwarfsck");
return dwarfsck_main(args, iol->get());
}
};
class dwarfsextract_main_test : public tool_main_test {
public:
int run(std::vector args) {
args.insert(args.begin(), "dwarfsextract");
return dwarfsextract_main(args, iol->get());
}
};
TEST_F(mkdwarfs_main_test, no_cmdline_args) {
auto exit_code = run({});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: mkdwarfs"));
EXPECT_THAT(out(), ::testing::HasSubstr("--help"));
}
TEST_F(dwarfsck_main_test, no_cmdline_args) {
auto exit_code = run({});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: dwarfsck"));
EXPECT_THAT(out(), ::testing::HasSubstr("--help"));
}
TEST_F(dwarfsextract_main_test, no_cmdline_args) {
auto exit_code = run({});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: dwarfsextract"));
EXPECT_THAT(out(), ::testing::HasSubstr("--help"));
}
TEST_F(mkdwarfs_main_test, invalid_cmdline_args) {
auto exit_code = run({"--some-invalid-option"});
EXPECT_EQ(exit_code, 1);
EXPECT_FALSE(err().empty());
EXPECT_TRUE(out().empty());
EXPECT_THAT(err(), ::testing::HasSubstr(
"unrecognised option '--some-invalid-option'"));
}
TEST_F(dwarfsck_main_test, invalid_cmdline_args) {
auto exit_code = run({"--some-invalid-option"});
EXPECT_EQ(exit_code, 1);
EXPECT_FALSE(err().empty());
EXPECT_TRUE(out().empty());
EXPECT_THAT(err(), ::testing::HasSubstr(
"unrecognised option '--some-invalid-option'"));
}
TEST_F(dwarfsextract_main_test, invalid_cmdline_args) {
auto exit_code = run({"--some-invalid-option"});
EXPECT_EQ(exit_code, 1);
EXPECT_FALSE(err().empty());
EXPECT_TRUE(out().empty());
EXPECT_THAT(err(), ::testing::HasSubstr(
"unrecognised option '--some-invalid-option'"));
}
TEST_F(mkdwarfs_main_test, cmdline_help_arg) {
auto exit_code = run({"--help"});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: mkdwarfs"));
EXPECT_THAT(out(), ::testing::HasSubstr("--help"));
EXPECT_THAT(out(), ::testing::HasSubstr("--long-help"));
// check that the detailed help is not shown
EXPECT_THAT(out(), ::testing::Not(::testing::HasSubstr("Advanced options:")));
EXPECT_THAT(out(),
::testing::Not(::testing::HasSubstr("Compression algorithms:")));
}
TEST_F(mkdwarfs_main_test, cmdline_long_help_arg) {
auto exit_code = run({"--long-help"});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: mkdwarfs"));
EXPECT_THAT(out(), ::testing::HasSubstr("Advanced options:"));
EXPECT_THAT(out(), ::testing::HasSubstr("Compression level defaults:"));
EXPECT_THAT(out(), ::testing::HasSubstr("Compression algorithms:"));
EXPECT_THAT(out(), ::testing::HasSubstr("Categories:"));
}
TEST_F(dwarfsck_main_test, cmdline_help_arg) {
auto exit_code = run({"--help"});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: dwarfsck"));
}
TEST_F(dwarfsextract_main_test, cmdline_help_arg) {
auto exit_code = run({"--help"});
EXPECT_EQ(exit_code, 0);
EXPECT_TRUE(err().empty());
EXPECT_FALSE(out().empty());
EXPECT_THAT(out(), ::testing::HasSubstr("Usage: dwarfsextract"));
}
#ifdef DWARFS_PERFMON_ENABLED
TEST(dwarfsextract_test, perfmon) {
auto t = dwarfsextract_tester::create_with_image();
ASSERT_EQ(0, t.run({"-i", "image.dwarfs", "-f", "mtree", "--perfmon",
"filesystem_v2,inode_reader_v2"}))
<< t.err();
auto outs = t.out();
auto errs = t.err();
EXPECT_GT(outs.size(), 100);
EXPECT_FALSE(errs.empty());
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.readv_future]"));
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.getattr]"));
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.open]"));
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.readlink]"));
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.statvfs]"));
EXPECT_THAT(errs, ::testing::HasSubstr("[inode_reader_v2.readv_future]"));
static std::regex const perfmon_re{R"(\[filesystem_v2\.getattr\])"
R"(\s+samples:\s+\d+)"
R"(\s+overall:\s+\d+(\.\d+)?[num]?s)"
R"(\s+avg latency:\s+\d+(\.\d+)?[num]?s)"
R"(\s+p50 latency:\s+\d+(\.\d+)?[num]?s)"
R"(\s+p90 latency:\s+\d+(\.\d+)?[num]?s)"
R"(\s+p99 latency:\s+\d+(\.\d+)?[num]?s)"};
EXPECT_TRUE(std::regex_search(errs, perfmon_re)) << errs;
}
#endif
class mkdwarfs_input_list_test : public testing::TestWithParam {};
TEST_P(mkdwarfs_input_list_test, basic) {
auto mode = GetParam();
std::string const image_file = "test.dwarfs";
std::string const input_list = "somelink\nfoo.pl\nsomedir/ipsum.py\n";
mkdwarfs_tester t;
std::string input_file;
if (mode == input_mode::from_file) {
input_file = "input_list.txt";
t.fa->set_file(input_file, input_list);
} else {
input_file = "-";
t.iol->set_in(input_list);
}
ASSERT_EQ(0, t.run({"--input-list", input_file, "-o", image_file}));
std::ostringstream oss;
t.add_stream_logger(oss, logger::DEBUG);
auto fs = t.fs_from_file(image_file);
auto link = fs.find("/somelink");
auto foo = fs.find("/foo.pl");
auto ipsum = fs.find("/somedir/ipsum.py");
EXPECT_TRUE(link);
EXPECT_TRUE(foo);
EXPECT_TRUE(ipsum);
EXPECT_FALSE(fs.find("/test.pl"));
EXPECT_TRUE(link->is_symlink());
EXPECT_TRUE(foo->is_regular_file());
EXPECT_TRUE(ipsum->is_regular_file());
std::set const expected = {"", "somelink", "foo.pl", "somedir",
fs::path("somedir") / "ipsum.py"};
std::set actual;
fs.walk([&](auto const& e) { actual.insert(e.fs_path()); });
EXPECT_EQ(expected, actual);
}
INSTANTIATE_TEST_SUITE_P(dwarfs, mkdwarfs_input_list_test,
::testing::ValuesIn(input_modes));
TEST(mkdwarfs_test, input_list_large) {
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
auto paths = t.add_random_file_tree({.avg_size = 32.0, .dimension = 32});
{
std::ostringstream os;
for (auto const& p : paths) {
os << p.first.string() << '\n';
}
t.iol->set_in(os.str());
}
ASSERT_EQ(0, t.run({"-l3", "--input-list", "-", "-o", "-"})) << t.err();
auto fs = t.fs_from_stdout();
std::set expected;
std::transform(paths.begin(), paths.end(),
std::inserter(expected, expected.end()),
[](auto const& p) { return p.first; });
std::set actual;
fs.walk([&](auto const& e) {
if (e.inode().is_regular_file()) {
actual.insert(e.fs_path());
}
});
EXPECT_EQ(expected, actual);
}
class logging_test : public testing::TestWithParam {};
TEST_P(logging_test, end_to_end) {
auto level = GetParam();
std::string const image_file = "test.dwarfs";
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
t.os->add_file("random", 4096, true);
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "--categorize",
fmt::format("--log-level={}", level)}));
auto fs = t.fs_from_file(image_file);
auto iv16 = fs.find("/test8.aiff");
auto iv32 = fs.find("/test8.caf");
EXPECT_TRUE(iv16);
EXPECT_TRUE(iv32);
{
std::vector dumps;
for (int detail = 0; detail <= 6; ++detail) {
std::ostringstream os;
fs.dump(os, detail);
auto d = os.str();
if (!dumps.empty()) {
EXPECT_GT(d.size(), dumps.back().size()) << detail;
}
dumps.emplace_back(std::move(d));
}
EXPECT_GT(dumps.back().size(), 10'000);
}
{
std::vector infos;
for (int detail = 0; detail <= 4; ++detail) {
auto info = fs.info_as_dynamic(detail);
auto i = folly::toJson(info);
if (!infos.empty()) {
EXPECT_GT(i.size(), infos.back().size()) << detail;
}
infos.emplace_back(std::move(i));
}
EXPECT_GT(infos.back().size(), 1'000);
}
}
INSTANTIATE_TEST_SUITE_P(mkdwarfs, logging_test,
::testing::ValuesIn(log_level_strings));
class term_logging_test
: public testing::TestWithParam> {};
TEST_P(term_logging_test, end_to_end) {
auto const [level, fancy] = GetParam();
std::map> const match{
{"error", {"", 'E'}},
{"warn", {"", 'W'}},
{"info", {"", 'I'}},
{"verbose", {"", 'V'}},
{"debug", {"", 'D'}},
{"trace", {"", 'T'}},
};
auto const cutoff =
std::find(log_level_strings.begin(), log_level_strings.end(), level);
ASSERT_FALSE(cutoff == log_level_strings.end());
{
mkdwarfs_tester t;
t.iol->set_terminal_is_tty(fancy);
t.iol->set_terminal_fancy(fancy);
t.os->set_access_fail("/somedir/ipsum.py"); // trigger an error
EXPECT_EQ(2, t.run("-l1 -i / -o - --categorize --num-workers=8 -S 22 "
"-L 16M --progress=none --log-level=" +
std::string(level)))
<< t.err();
auto err = t.err();
auto it = log_level_strings.begin();
auto make_contains_regex = [fancy = fancy](auto m) {
auto const& [color, prefix] = m->second;
auto beg = fancy ? color : std::string_view{};
auto end = fancy && !color.empty() ? "" : "";
return fmt::format("{}{}\\s\\d\\d:\\d\\d:\\d\\d.*{}\\r?\\n", beg, prefix,
end);
};
while (it != cutoff + 1) {
auto m = match.find(*it);
EXPECT_FALSE(m == match.end());
auto re = make_contains_regex(m);
EXPECT_TRUE(std::regex_search(err, std::regex(re))) << re << ", " << err;
++it;
}
while (it != log_level_strings.end()) {
auto m = match.find(*it);
EXPECT_FALSE(m == match.end());
auto re = make_contains_regex(m);
EXPECT_FALSE(std::regex_search(err, std::regex(re))) << re << ", " << err;
++it;
}
}
}
INSTANTIATE_TEST_SUITE_P(
mkdwarfs, term_logging_test,
::testing::Combine(::testing::ValuesIn(log_level_strings),
::testing::Bool()));
TEST(mkdwarfs_test, no_log_context) {
mkdwarfs_tester t;
EXPECT_EQ(0, t.run("-l3 -i / -o -")) << t.err();
EXPECT_THAT(t.err(), ::testing::Not(::testing::HasSubstr("[scanner.cpp:")));
}
TEST(mkdwarfs_test, default_log_context) {
mkdwarfs_tester t;
EXPECT_EQ(0, t.run("-l3 -i / -o - --log-level=verbose")) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("[scanner.cpp:"));
}
TEST(mkdwarfs_test, explicit_log_context) {
mkdwarfs_tester t;
EXPECT_EQ(0, t.run("-l3 -i / -o - --log-with-context")) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("[scanner.cpp:"));
}
TEST(mkdwarfs_test, metadata_inode_info) {
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
t.os->add_file("random", 4096, true);
ASSERT_EQ(0, t.run("-l3 -i / -o - --categorize"));
auto fs = t.fs_from_stdout();
{
auto iv = fs.find("/test8.aiff");
ASSERT_TRUE(iv);
auto info = fs.get_inode_info(*iv);
ASSERT_TRUE(info.count("chunks") > 0);
std::set categories;
for (auto chunk : info["chunks"]) {
ASSERT_TRUE(chunk.count("category") > 0);
categories.insert(chunk["category"].asString());
}
std::set expected{
"pcmaudio/metadata",
"pcmaudio/waveform",
};
EXPECT_EQ(expected, categories);
}
{
auto iv = fs.find("/test.fits");
ASSERT_TRUE(iv);
auto info = fs.get_inode_info(*iv);
ASSERT_TRUE(info.count("chunks") > 0);
std::set categories;
for (auto chunk : info["chunks"]) {
ASSERT_TRUE(chunk.count("category") > 0);
categories.insert(chunk["category"].asString());
}
std::set expected{
"fits/image",
"fits/metadata",
};
EXPECT_EQ(expected, categories);
}
}
TEST(mkdwarfs_test, metadata_path) {
fs::path const f1{"test.txt"};
fs::path const f2{U"猫.txt"};
fs::path const f3{u8"⚽️.bin"};
fs::path const f4{L"Карибського"};
fs::path const d1{u8"我爱你"};
fs::path const f5{d1 / u8"☀️ Sun"};
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_file(f1, 2, true);
t.os->add_file(f2, 4, true);
t.os->add_file(f3, 8, true);
t.os->add_file(f4, 16, true);
t.os->add_dir(d1);
t.os->add_file(f5, 32, true);
ASSERT_EQ(0, t.run("-l3 -i / -o -"));
auto fs = t.fs_from_stdout();
std::map entries;
fs.walk([&](auto e) {
file_stat stat;
if (fs.getattr(e.inode(), &stat) != 0) {
throw std::runtime_error(
fmt::format("getattr() failed for {}", e.path()));
}
if (stat.is_regular_file()) {
entries.emplace(stat.size, e);
}
});
ASSERT_EQ(entries.size(), 5);
auto e1 = entries.at(2);
auto e2 = entries.at(4);
auto e3 = entries.at(8);
auto e4 = entries.at(16);
auto e5 = entries.at(32);
auto de = fs.find(d1.string().c_str());
ASSERT_TRUE(de);
EXPECT_EQ(de->mode_string(), "---drwxr-xr-x");
EXPECT_EQ(e1.inode().mode_string(), "----rw-r--r--");
EXPECT_EQ(e1.fs_path(), f1);
EXPECT_EQ(e2.fs_path(), f2);
EXPECT_EQ(e3.fs_path(), f3);
EXPECT_EQ(e4.fs_path(), f4);
EXPECT_EQ(e5.fs_path(), f5);
EXPECT_EQ(e1.wpath(), L"test.txt");
EXPECT_EQ(e2.wpath(), L"猫.txt");
EXPECT_EQ(e3.wpath(), L"⚽️.bin");
EXPECT_EQ(e4.wpath(), L"Карибського");
#ifdef _WIN32
EXPECT_EQ(e5.wpath(), L"我爱你\\☀️ Sun");
#else
EXPECT_EQ(e5.wpath(), L"我爱你/☀️ Sun");
#endif
EXPECT_EQ(e1.path(), "test.txt");
EXPECT_EQ(e2.path(), "猫.txt");
EXPECT_EQ(e3.path(), "⚽️.bin");
EXPECT_EQ(e4.path(), "Карибського");
#ifdef _WIN32
EXPECT_EQ(e5.path(), "我爱你\\☀️ Sun");
#else
EXPECT_EQ(e5.path(), "我爱你/☀️ Sun");
#endif
EXPECT_EQ(e1.unix_path(), "test.txt");
EXPECT_EQ(e2.unix_path(), "猫.txt");
EXPECT_EQ(e3.unix_path(), "⚽️.bin");
EXPECT_EQ(e4.unix_path(), "Карибського");
EXPECT_EQ(e5.unix_path(), "我爱你/☀️ Sun");
}
TEST(mkdwarfs_test, metadata_modes) {
mkdwarfs_tester t;
t.add_special_files();
ASSERT_EQ(0, t.run("-l3 -i / -o - --with-specials --with-devices"));
auto fs = t.fs_from_stdout();
auto d1 = fs.find("/");
auto d2 = fs.find("/foo.pl");
auto d3 = fs.find("/somelink");
auto d4 = fs.find("/somedir");
auto d5 = fs.find("/somedir/pipe");
auto d6 = fs.find("/somedir/null");
auto d7 = fs.find("/suid");
auto d8 = fs.find("/sgid");
auto d9 = fs.find("/sticky");
auto d10 = fs.find("/block");
auto d11 = fs.find("/sock");
ASSERT_TRUE(d1);
ASSERT_TRUE(d2);
ASSERT_TRUE(d3);
ASSERT_TRUE(d4);
ASSERT_TRUE(d5);
ASSERT_TRUE(d6);
ASSERT_TRUE(d7);
ASSERT_TRUE(d8);
ASSERT_TRUE(d9);
ASSERT_TRUE(d10);
ASSERT_TRUE(d11);
EXPECT_EQ(d1->mode_string(), "---drwxrwxrwx");
EXPECT_EQ(d2->mode_string(), "----rw-------");
EXPECT_EQ(d3->mode_string(), "---lrwxrwxrwx");
EXPECT_EQ(d4->mode_string(), "---drwxrwxrwx");
EXPECT_EQ(d5->mode_string(), "---prw-r--r--");
EXPECT_EQ(d6->mode_string(), "---crw-rw-rw-");
EXPECT_EQ(d7->mode_string(), "U---rwxr-xr-x");
EXPECT_EQ(d8->mode_string(), "-G--rwxr-xr-x");
EXPECT_EQ(d9->mode_string(), "--S-rwxr-xr-x");
EXPECT_EQ(d10->mode_string(), "---brw-rw-rw-");
EXPECT_EQ(d11->mode_string(), "---srw-rw-rw-");
}
TEST(mkdwarfs_test, metadata_specials) {
mkdwarfs_tester t;
t.add_special_files();
ASSERT_EQ(0, t.run("-l3 -i / -o - --with-specials --with-devices"));
auto fs = t.fs_from_stdout();
std::ostringstream oss;
fs.dump(oss, 9);
auto dump = oss.str();
auto meta = fs.metadata_as_dynamic();
std::set types;
for (auto const& ino : meta["root"]["inodes"]) {
types.insert(ino["type"].asString());
if (auto di = ino.find("inodes"); di != ino.items().end()) {
for (auto const& ino2 : di->second) {
types.insert(ino2["type"].asString());
}
}
}
std::set expected_types = {
"file", "link", "directory", "chardev", "blockdev", "socket", "fifo"};
EXPECT_EQ(expected_types, types);
EXPECT_THAT(dump, ::testing::HasSubstr("char device"));
EXPECT_THAT(dump, ::testing::HasSubstr("block device"));
EXPECT_THAT(dump, ::testing::HasSubstr("socket"));
EXPECT_THAT(dump, ::testing::HasSubstr("named pipe"));
auto iv = fs.find("/block");
ASSERT_TRUE(iv);
file_stat stat;
EXPECT_EQ(0, fs.getattr(*iv, &stat));
EXPECT_TRUE(stat.is_device());
EXPECT_EQ(77, stat.rdev);
}
TEST(mkdwarfs_test, metadata_time_resolution) {
mkdwarfs_tester t;
t.add_special_files();
ASSERT_EQ(0, t.run("-l3 -i / -o - --time-resolution=min --keep-all-times"));
auto fs = t.fs_from_stdout();
std::ostringstream oss;
fs.dump(oss, 9);
auto dump = oss.str();
EXPECT_THAT(dump, ::testing::HasSubstr("time resolution: 60 seconds"));
auto dyn = fs.info_as_dynamic(9);
EXPECT_EQ(60, dyn["time_resolution"].asInt());
auto iv = fs.find("/suid");
ASSERT_TRUE(iv);
file_stat stat;
EXPECT_EQ(0, fs.getattr(*iv, &stat));
EXPECT_EQ(3300, stat.atime);
EXPECT_EQ(2220, stat.mtime);
EXPECT_EQ(1080, stat.ctime);
}
TEST(mkdwarfs_test, metadata_readdir) {
mkdwarfs_tester t;
ASSERT_EQ(0, t.run("-l3 -i / -o -"));
auto fs = t.fs_from_stdout();
auto iv = fs.find("/somedir");
ASSERT_TRUE(iv);
auto dir = fs.opendir(*iv);
ASSERT_TRUE(dir);
{
auto r = fs.readdir(dir.value(), 0);
ASSERT_TRUE(r);
auto [ino, name] = r.value();
EXPECT_EQ(".", name);
EXPECT_EQ(ino.inode_num(), iv->inode_num());
}
{
auto r = fs.readdir(dir.value(), 1);
ASSERT_TRUE(r);
auto [ino, name] = r.value();
EXPECT_EQ("..", name);
auto parent = fs.find("/");
ASSERT_TRUE(parent);
EXPECT_EQ(ino.inode_num(), parent->inode_num());
}
{
auto r = fs.readdir(dir.value(), 100);
EXPECT_FALSE(r);
}
}
TEST(mkdwarfs_test, metadata_access) {
#ifdef _WIN32
static constexpr int const x_ok = 1;
#else
static constexpr int const x_ok = X_OK;
#endif
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add("access", {1001, 040742, 1, 222, 3333});
ASSERT_EQ(0, t.run("-l3 -i / -o -"));
{
auto fs = t.fs_from_stdout();
auto iv = fs.find("/access");
ASSERT_TRUE(iv);
EXPECT_EQ(0, fs.access(*iv, F_OK, 1, 1));
EXPECT_EQ(EACCES, fs.access(*iv, R_OK, 1, 1));
EXPECT_EQ(0, fs.access(*iv, W_OK, 1, 1));
EXPECT_EQ(EACCES, fs.access(*iv, x_ok, 1, 1));
EXPECT_EQ(0, fs.access(*iv, R_OK, 1, 3333));
EXPECT_EQ(0, fs.access(*iv, W_OK, 1, 3333));
EXPECT_EQ(EACCES, fs.access(*iv, x_ok, 1, 3333));
EXPECT_EQ(0, fs.access(*iv, R_OK, 222, 7));
EXPECT_EQ(0, fs.access(*iv, W_OK, 222, 7));
EXPECT_EQ(0, fs.access(*iv, x_ok, 222, 7));
}
{
auto fs = t.fs_from_stdout({.metadata = {.readonly = true}});
auto iv = fs.find("/access");
ASSERT_TRUE(iv);
EXPECT_EQ(0, fs.access(*iv, F_OK, 1, 1));
EXPECT_EQ(EACCES, fs.access(*iv, R_OK, 1, 1));
EXPECT_EQ(EACCES, fs.access(*iv, W_OK, 1, 1));
EXPECT_EQ(EACCES, fs.access(*iv, x_ok, 1, 1));
EXPECT_EQ(0, fs.access(*iv, R_OK, 1, 3333));
EXPECT_EQ(EACCES, fs.access(*iv, W_OK, 1, 3333));
EXPECT_EQ(EACCES, fs.access(*iv, x_ok, 1, 3333));
EXPECT_EQ(0, fs.access(*iv, R_OK, 222, 7));
EXPECT_EQ(EACCES, fs.access(*iv, W_OK, 222, 7));
EXPECT_EQ(0, fs.access(*iv, x_ok, 222, 7));
}
}
TEST(mkdwarfs_test, chmod_norm) {
std::string const image_file = "test.dwarfs";
std::set real, norm;
{
mkdwarfs_tester t;
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file}));
auto fs = t.fs_from_file(image_file);
fs.walk([&](auto const& e) { real.insert(e.inode().perm_string()); });
}
{
mkdwarfs_tester t;
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "--chmod=norm"}));
auto fs = t.fs_from_file(image_file);
fs.walk([&](auto const& e) { norm.insert(e.inode().perm_string()); });
}
EXPECT_NE(real, norm);
std::set expected_norm = {"r--r--r--", "r-xr-xr-x"};
EXPECT_EQ(expected_norm, norm);
}
TEST(mkdwarfs_test, dump_inodes) {
std::string const image_file = "test.dwarfs";
std::string const inode_file = "inode.dump";
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
t.os->add_file("random", 4096, true);
t.os->add_file("large", 32 * 1024 * 1024);
t.add_random_file_tree({.avg_size = 1024.0, .dimension = 8});
t.os->setenv("DWARFS_DUMP_INODES", inode_file);
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "--categorize", "-W8"}));
auto dump = t.fa->get_file(inode_file);
ASSERT_TRUE(dump);
EXPECT_GT(dump->size(), 1000) << dump.value();
}
TEST(mkdwarfs_test, set_time_now) {
auto t0 =
std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
auto [regfs, regt] = build_with_args();
ASSERT_TRUE(regfs) << regt.err();
auto reg = get_all_fs_times(*regfs);
auto [optfs, optt] = build_with_args({"--set-time=now"});
ASSERT_TRUE(optfs) << optt.err();
auto opt = get_all_fs_times(*optfs);
auto t1 =
std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
ASSERT_EQ(reg.size(), 11);
ASSERT_EQ(opt.size(), 1);
EXPECT_GE(*opt.begin(), t0);
EXPECT_LE(*opt.begin(), t1);
}
TEST(mkdwarfs_test, set_time_epoch) {
auto [regfs, regt] = build_with_args();
ASSERT_TRUE(regfs) << regt.err();
auto reg = get_all_fs_times(*regfs);
auto [optfs, optt] = build_with_args({"--set-time=100000001"});
ASSERT_TRUE(optfs) << optt.err();
auto opt = get_all_fs_times(*optfs);
EXPECT_EQ(reg.size(), 11);
ASSERT_EQ(opt.size(), 1);
EXPECT_EQ(*opt.begin(), 100000001);
}
TEST(mkdwarfs_test, set_time_epoch_string) {
using namespace std::chrono_literals;
using std::chrono::sys_days;
auto [optfs, optt] = build_with_args({"--set-time", "2020-01-01 01:02"});
ASSERT_TRUE(optfs) << optt.err();
auto opt = get_all_fs_times(*optfs);
ASSERT_EQ(opt.size(), 1);
EXPECT_EQ(*opt.begin(),
std::chrono::duration_cast(
(sys_days{2020y / 1 / 1} + 1h + 2min).time_since_epoch())
.count());
}
TEST(mkdwarfs_test, set_time_error) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--set-time=InVaLiD"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("cannot parse time point"));
}
TEST(mkdwarfs_test, set_owner) {
auto [regfs, regt] = build_with_args();
ASSERT_TRUE(regfs) << regt.err();
auto reg = get_all_fs_uids(*regfs);
auto [optfs, optt] = build_with_args({"--set-owner=333"});
ASSERT_TRUE(optfs) << optt.err();
auto opt = get_all_fs_uids(*optfs);
ASSERT_EQ(reg.size(), 2);
ASSERT_EQ(opt.size(), 1);
EXPECT_EQ(*opt.begin(), 333);
}
TEST(mkdwarfs_test, set_group) {
auto [regfs, regt] = build_with_args();
ASSERT_TRUE(regfs) << regt.err();
auto reg = get_all_fs_gids(*regfs);
auto [optfs, optt] = build_with_args({"--set-group=444"});
ASSERT_TRUE(optfs) << optt.err();
auto opt = get_all_fs_gids(*optfs);
ASSERT_EQ(reg.size(), 2);
ASSERT_EQ(opt.size(), 1);
EXPECT_EQ(*opt.begin(), 444);
}
TEST(mkdwarfs_test, unrecognized_arguments) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("unrecognized argument"));
}
TEST(mkdwarfs_test, invalid_compression_level) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-l", "10"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid compression level"));
}
TEST(mkdwarfs_test, block_size_too_small) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-S", "1"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("block size must be between"));
}
TEST(mkdwarfs_test, block_size_too_large) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-S", "100"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("block size must be between"));
}
TEST(mkdwarfs_test, cannot_combine_input_list_and_filter) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"--input-list", "-", "-o", "-", "-F", "+ *"}));
EXPECT_THAT(t.err(),
::testing::HasSubstr("cannot combine --input-list and --filter"));
}
TEST(mkdwarfs_test, cannot_open_input_list_file) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("cannot open input list file"));
}
class mkdwarfs_recompress_test
: public testing::TestWithParam {};
TEST_P(mkdwarfs_recompress_test, recompress) {
std::string const compression{GetParam()};
std::string const image_file = "test.dwarfs";
std::string image;
{
mkdwarfs_tester t;
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
t.os->add_file("random", 4096, true);
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "--categorize", "-C",
compression}))
<< t.err();
auto img = t.fa->get_file(image_file);
EXPECT_TRUE(img);
image = std::move(img.value());
auto fs = t.fs_from_file(image_file);
auto history = fs.info_as_dynamic(2)["history"];
EXPECT_EQ(1, history.size());
}
auto tester = [&image_file](std::string const& image_data) {
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_file(image_file, image_data);
return t;
};
{
auto t = tester(image);
ASSERT_EQ(0, t.run({"-i", image_file, "-o", "-", "--recompress", "-l0"}))
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
auto history = fs.info_as_dynamic(2)["history"];
EXPECT_EQ(2, history.size());
}
{
auto t = tester(image);
EXPECT_NE(0, t.run({"-i", image_file, "-o", "-", "--recompress=foo"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid recompress mode"));
}
{
auto t = tester(image);
ASSERT_EQ(0, t.run({"-i", image_file, "-o", "-", "--recompress=metadata"}))
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
}
{
auto t = tester(image);
ASSERT_EQ(0, t.run({"-i", image_file, "-o", "-", "--recompress=block",
"--recompress-categories=!pcmaudio/waveform", "-C",
"pcmaudio/metadata::null"}))
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
}
{
auto t = tester(image);
EXPECT_EQ(1, t.run({"-i", image_file, "-o", "-", "--recompress",
"--recompress-categories=pcmaudio/metadata,SoMeThInG"}))
<< t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr(
"no category 'SoMeThInG' in input filesystem"));
}
{
auto t = tester(image);
EXPECT_EQ(1, t.run({"-i", image_file, "-o", "-", "--recompress", "-C",
"SoMeThInG::null"}))
<< t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("unknown category: 'SoMeThInG'"));
}
{
auto t = tester(image);
EXPECT_EQ(0, t.run({"-i", image_file, "-o", "-", "--recompress=none",
"--log-level=verbose", "--no-history"}))
<< t.err();
auto fs = t.fs_from_stdout();
EXPECT_TRUE(fs.find("/random"));
EXPECT_EQ(0, fs.get_history().get().entries()->size());
EXPECT_EQ(1, fs.info_as_dynamic(2).count("history"));
EXPECT_THAT(t.err(), ::testing::HasSubstr("removing HISTORY"));
}
{
auto corrupt_image = image;
corrupt_image[64] ^= 0x01; // flip a bit right after the header
auto t = tester(corrupt_image);
EXPECT_NE(0, t.run({"-i", image_file, "-o", "-", "--recompress"}))
<< t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("input filesystem is corrupt"));
}
}
namespace {
constexpr std::array const source_fs_compression = {
"zstd:level=5",
"null",
};
} // namespace
INSTANTIATE_TEST_SUITE_P(dwarfs, mkdwarfs_recompress_test,
::testing::ValuesIn(source_fs_compression));
class mkdwarfs_build_options_test
: public testing::TestWithParam {};
TEST_P(mkdwarfs_build_options_test, basic) {
auto opts = GetParam();
auto options = test::parse_args(opts);
std::string const image_file = "test.dwarfs";
std::vector args = {"-i", "/", "-o",
image_file, "-C", "zstd:level=9"};
args.insert(args.end(), options.begin(), options.end());
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.add_random_file_tree();
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
ASSERT_EQ(0, t.run(args));
auto fs = t.fs_from_file(image_file);
fs.dump(std::cout, 3);
}
namespace {
constexpr std::array const build_options = {
"--categorize --order=none --file-hash=none",
"--categorize=pcmaudio --order=path",
"--categorize --order=revpath --file-hash=sha512",
"--categorize=pcmaudio,incompressible --order=similarity",
"--categorize --order=nilsimsa --time-resolution=30",
"--categorize --order=nilsimsa:max-children=1k --time-resolution=hour",
"--categorize --order=nilsimsa:max-cluster-size=16:max-children=16 "
"--max-similarity-size=1M",
"--categorize -B4 -S18",
};
} // namespace
INSTANTIATE_TEST_SUITE_P(dwarfs, mkdwarfs_build_options_test,
::testing::ValuesIn(build_options));
TEST(mkdwarfs_test, order_invalid) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--order=grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid inode order mode"));
}
TEST(mkdwarfs_test, order_nilsimsa_invalid_option) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--order=nilsimsa:grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr(
"invalid option(s) for choice nilsimsa: grmpf"));
}
TEST(mkdwarfs_test, order_nilsimsa_invalid_value) {
mkdwarfs_tester t;
EXPECT_NE(0,
t.run({"-i", "/", "-o", "-", "--order=nilsimsa:max-children=0"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid max-children value: 0"));
}
TEST(mkdwarfs_test, order_nilsimsa_cannot_parse_value) {
mkdwarfs_tester t;
EXPECT_NE(
0, t.run({"-i", "/", "-o", "-", "--order=nilsimsa:max-cluster-size=-1"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("cannot parse size value"));
}
TEST(mkdwarfs_test, order_nilsimsa_duplicate_option) {
mkdwarfs_tester t;
EXPECT_NE(0,
t.run({"-i", "/", "-o", "-",
"--order=nilsimsa:max-cluster-size=1:max-cluster-size=10"}));
EXPECT_THAT(t.err(),
::testing::HasSubstr(
"duplicate option max-cluster-size for choice nilsimsa"));
}
TEST(mkdwarfs_test, unknown_file_hash) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--file-hash=grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("unknown file hash function"));
}
TEST(mkdwarfs_test, invalid_filter_debug_mode) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--debug-filter=grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid filter debug mode"));
}
TEST(mkdwarfs_test, invalid_progress_mode) {
mkdwarfs_tester t;
t.iol->set_terminal_is_tty(true);
t.iol->set_terminal_fancy(true);
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--progress=grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid progress mode"));
}
TEST(mkdwarfs_test, invalid_filter_rule) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("could not parse filter rule"));
}
TEST(mkdwarfs_test, time_resolution_zero) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=0"}));
EXPECT_THAT(t.err(),
::testing::HasSubstr("'--time-resolution' must be nonzero"));
}
TEST(mkdwarfs_test, time_resolution_invalid) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("'--time-resolution' is invalid"));
}
namespace {
constexpr std::array const debug_filter_mode_names = {
"included", "excluded", "included-files", "excluded-files", "files", "all",
};
const std::map debug_filter_modes{
{"included", debug_filter_mode::INCLUDED},
{"included-files", debug_filter_mode::INCLUDED_FILES},
{"excluded", debug_filter_mode::EXCLUDED},
{"excluded-files", debug_filter_mode::EXCLUDED_FILES},
{"files", debug_filter_mode::FILES},
{"all", debug_filter_mode::ALL},
};
} // namespace
class filter_test : public testing::TestWithParam<
std::tuple> {
};
TEST_P(filter_test, debug_filter) {
auto [data, mode] = GetParam();
auto t = mkdwarfs_tester::create_empty();
t.add_test_file_tree();
t.fa->set_file("filter.txt", data.filter());
ASSERT_EQ(0, t.run({"-i", "/", "-F", ". filter.txt",
"--debug-filter=" + std::string(mode)}))
<< t.err();
auto expected = data.get_expected_filter_output(debug_filter_modes.at(mode));
EXPECT_EQ(expected, t.out());
}
INSTANTIATE_TEST_SUITE_P(
mkdwarfs_test, filter_test,
::testing::Combine(::testing::ValuesIn(dwarfs::test::get_filter_tests()),
::testing::ValuesIn(debug_filter_mode_names)));
TEST(mkdwarfs_test, filter_recursion) {
auto t = mkdwarfs_tester::create_empty();
t.add_test_file_tree();
t.fa->set_file("filt1.txt", ". filt2.txt\n");
t.fa->set_file("filt2.txt", ". filt3.txt\n");
t.fa->set_file("filt3.txt", "# here we recurse\n. filt1.txt\n");
EXPECT_EQ(1, t.run({"-i", "/", "-o", "-", "-F", ". filt1.txt"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr(
"recursion detected while opening file: filt1.txt"));
}
namespace {
constexpr std::array const pack_mode_names = {
"chunk_table", "directories", "shared_files", "names", "names_index",
"symlinks", "symlinks_index", "force", "plain",
};
}
TEST(mkdwarfs_test, pack_modes_random) {
std::mt19937_64 rng{42};
std::uniform_int_distribution<> dist{1, pack_mode_names.size()};
for (int i = 0; i < 50; ++i) {
std::vector modes(pack_mode_names.begin(),
pack_mode_names.end());
std::shuffle(modes.begin(), modes.end(), rng);
modes.resize(dist(rng));
auto mode_arg = folly::join(',', modes);
auto t = mkdwarfs_tester::create_empty();
t.add_test_file_tree();
t.add_random_file_tree({.avg_size = 128.0, .dimension = 16});
ASSERT_EQ(
0, t.run({"-i", "/", "-o", "-", "-l1", "--pack-metadata=" + mode_arg}))
<< t.err();
auto fs = t.fs_from_stdout();
auto info = fs.info_as_dynamic(2);
std::set ms(modes.begin(), modes.end());
std::set fsopt;
for (auto const& opt : info["options"]) {
fsopt.insert(opt.asString());
}
auto ctx = mode_arg + "\n" + fs.dump(2);
EXPECT_EQ(ms.count("chunk_table"), fsopt.count("packed_chunk_table"))
<< ctx;
EXPECT_EQ(ms.count("directories"), fsopt.count("packed_directories"))
<< ctx;
EXPECT_EQ(ms.count("shared_files"),
fsopt.count("packed_shared_files_table"))
<< ctx;
if (ms.count("plain")) {
EXPECT_EQ(0, fsopt.count("packed_names")) << ctx;
EXPECT_EQ(0, fsopt.count("packed_names_index")) << ctx;
EXPECT_EQ(0, fsopt.count("packed_symlinks")) << ctx;
EXPECT_EQ(0, fsopt.count("packed_symlinks_index")) << ctx;
}
}
}
TEST(mkdwarfs_test, pack_mode_none) {
auto t = mkdwarfs_tester::create_empty();
t.add_test_file_tree();
t.add_random_file_tree({.avg_size = 128.0, .dimension = 16});
ASSERT_EQ(0, t.run({"-i", "/", "-o", "-", "-l1", "--pack-metadata=none"}))
<< t.err();
auto fs = t.fs_from_stdout();
auto info = fs.info_as_dynamic(2);
std::set fsopt;
for (auto const& opt : info["options"]) {
fsopt.insert(opt.asString());
}
fsopt.erase("mtime_only");
EXPECT_TRUE(fsopt.empty()) << folly::toJson(info["options"]);
}
TEST(mkdwarfs_test, pack_mode_all) {
auto t = mkdwarfs_tester::create_empty();
t.add_test_file_tree();
t.add_random_file_tree({.avg_size = 128.0, .dimension = 16});
ASSERT_EQ(0, t.run({"-i", "/", "-o", "-", "-l1", "--pack-metadata=all"}))
<< t.err();
auto fs = t.fs_from_stdout();
auto info = fs.info_as_dynamic(2);
std::set expected = {"packed_chunk_table",
"packed_directories",
"packed_names",
"packed_names_index",
"packed_shared_files_table",
"packed_symlinks_index"};
std::set fsopt;
for (auto const& opt : info["options"]) {
fsopt.insert(opt.asString());
}
fsopt.erase("mtime_only");
EXPECT_EQ(expected, fsopt) << folly::toJson(info["options"]);
}
TEST(mkdwarfs_test, pack_mode_invalid) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--pack-metadata=grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("'--pack-metadata' is invalid"));
}
TEST(mkdwarfs_test, filesystem_header) {
auto const header = test::loremipsum(333);
mkdwarfs_tester t;
t.fa->set_file("header.txt", header);
ASSERT_EQ(0, t.run({"-i", "/", "-o", "-", "--header=header.txt"})) << t.err();
auto image = t.out();
auto fs = t.fs_from_data(
image, {.image_offset = filesystem_options::IMAGE_OFFSET_AUTO});
auto hdr = fs.header();
ASSERT_TRUE(hdr);
std::string actual(reinterpret_cast(hdr->data()), hdr->size());
EXPECT_EQ(header, actual);
auto os = std::make_shared();
os->add("", {1, 040755, 1, 0, 0, 10, 42, 0, 0, 0});
os->add_file("image.dwarfs", image);
{
dwarfsck_tester t2(os);
EXPECT_EQ(0, t2.run({"image.dwarfs", "--print-header"})) << t2.err();
EXPECT_EQ(header, t2.out());
}
{
mkdwarfs_tester t2(os);
ASSERT_EQ(0, t2.run({"-i", "image.dwarfs", "-o", "-", "--recompress=none",
"--remove-header"}))
<< t2.err();
auto fs2 = t2.fs_from_stdout();
EXPECT_FALSE(fs2.header());
}
}
TEST(mkdwarfs_test, filesystem_header_error) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--header=header.txt"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("cannot open header file"));
}
TEST(mkdwarfs_test, output_file_exists) {
mkdwarfs_tester t;
t.fa->set_file("exists.dwarfs", "bla");
EXPECT_NE(0, t.run({"-i", "/", "-o", "exists.dwarfs"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("output file already exists"));
}
TEST(mkdwarfs_test, output_file_force) {
mkdwarfs_tester t;
t.fa->set_file("exists.dwarfs", "bla");
ASSERT_EQ(0, t.run({"-i", "/", "-o", "exists.dwarfs", "-l1", "--force"}))
<< t.err();
auto fs = t.fs_from_file("exists.dwarfs");
EXPECT_TRUE(fs.find("/foo.pl"));
}
TEST(mkdwarfs_test, output_file_fail_open) {
mkdwarfs_tester t;
t.fa->set_file("exists.dwarfs", "bla");
t.fa->set_open_error(
"exists.dwarfs",
std::make_error_code(std::errc::device_or_resource_busy));
EXPECT_NE(0, t.run({"-i", "/", "-o", "exists.dwarfs", "--force"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("cannot open output file"));
}
TEST(mkdwarfs_test, output_file_fail_close) {
mkdwarfs_tester t;
t.fa->set_close_error("test.dwarfs",
std::make_error_code(std::errc::no_space_on_device));
EXPECT_NE(0, t.run({"-i", "/", "-o", "test.dwarfs"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("failed to close output file"));
}
TEST(mkdwarfs_test, compression_cannot_be_used_without_category) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-C", "flac"}));
EXPECT_THAT(t.err(),
::testing::HasSubstr("cannot be used without a category"));
}
TEST(mkdwarfs_test, compression_cannot_be_used_for_category) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--categorize", "-C",
"incompressible::flac"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr(
"cannot be used for category 'incompressible': "
"metadata requirements not met"));
}
class mkdwarfs_progress_test : public testing::TestWithParam {};
TEST_P(mkdwarfs_progress_test, basic) {
std::string mode{GetParam()};
std::string const image_file = "test.dwarfs";
std::vector args{
"-i", "/", "-o", image_file, "--file-hash=sha512", "--progress", mode};
auto t = mkdwarfs_tester::create_empty();
t.iol->set_terminal_is_tty(true);
t.iol->set_terminal_fancy(true);
t.add_root_dir();
t.add_random_file_tree({
.avg_size = 20.0 * 1024 * 1024,
.dimension = 2,
#ifndef _WIN32
// Windows can't deal with non-UTF-8 filenames
.with_invalid_utf8 = true,
#endif
});
t.os->add_local_files(audio_data_dir);
t.os->add_local_files(fits_data_dir);
ASSERT_EQ(0, t.run(args));
EXPECT_TRUE(t.out().empty()) << t.out();
}
namespace {
constexpr std::array const progress_modes{
"none",
"simple",
"ascii",
"unicode",
};
} // namespace
INSTANTIATE_TEST_SUITE_P(dwarfs, mkdwarfs_progress_test,
::testing::ValuesIn(progress_modes));
TEST(dwarfsextract_test, mtree) {
auto t = dwarfsextract_tester::create_with_image();
ASSERT_EQ(0, t.run({"-i", "image.dwarfs", "-f", "mtree"})) << t.err();
auto out = t.out();
EXPECT_TRUE(out.starts_with("#mtree")) << out;
EXPECT_THAT(out, ::testing::HasSubstr("type=dir"));
EXPECT_THAT(out, ::testing::HasSubstr("type=file"));
}
TEST(dwarfsextract_test, stdout_progress_error) {
auto t = dwarfsextract_tester::create_with_image();
EXPECT_NE(0,
t.run({"-i", "image.dwarfs", "-f", "mtree", "--stdout-progress"}))
<< t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr(
"cannot use --stdout-progress with --output=-"));
}
TEST(dwarfsck_test, check_exclusive) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_NE(0, t.run({"image.dwarfs", "--no-check", "--check-integrity"}))
<< t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr(
"--no-check and --check-integrity are mutually exclusive"));
}
TEST(dwarfsck_test, print_header_and_json) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_NE(0, t.run({"image.dwarfs", "--print-header", "--json"})) << t.err();
EXPECT_THAT(t.err(),
::testing::ContainsRegex(
"--print-header is mutually exclusive with.*--json"));
}
TEST(dwarfsck_test, print_header) {
std::string const header = "interesting stuff in the header\n";
auto image =
build_test_image({"--header", "header.txt"}, {{"header.txt", header}});
{
auto t = dwarfsck_tester::create_with_image(image);
EXPECT_EQ(0, t.run({"image.dwarfs", "--print-header"})) << t.err();
EXPECT_EQ(header, t.out());
}
{
auto t = dwarfsck_tester::create_with_image(image);
t.iol->out_stream().setstate(std::ios_base::failbit);
EXPECT_EQ(1, t.run({"image.dwarfs", "--print-header"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr("error writing header"));
}
}
TEST(dwarfsck_test, check_fail) {
static constexpr size_t section_header_size{64};
auto image = build_test_image();
{
auto t = dwarfsck_tester::create_with_image(image);
EXPECT_EQ(0, t.run({"image.dwarfs"})) << t.err();
}
{
auto t = dwarfsck_tester::create_with_image(image);
EXPECT_EQ(0, t.run({"image.dwarfs", "--check-integrity"})) << t.err();
}
std::vector> section_offsets;
{
auto t = dwarfsck_tester::create_with_image(image);
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j", "-d3"})) << t.err();
auto info = folly::parseJson(t.out());
ASSERT_TRUE(info.count("sections") > 0) << folly::toPrettyJson(info);
size_t offset = 0;
for (auto const& section : info["sections"]) {
auto type = section["type"].asString();
auto size = section["compressed_size"].asInt();
section_offsets.emplace_back(type, offset);
offset += section_header_size + size;
}
EXPECT_EQ(image.size(), offset);
}
size_t index = 0;
for (auto const& [type, offset] : section_offsets) {
bool const is_metadata_section =
type == "METADATA_V2" || type == "METADATA_V2_SCHEMA";
bool const is_block = type == "BLOCK";
auto corrupt_image = image;
// flip a bit right after the header
corrupt_image[offset + section_header_size] ^= 0x01;
// std::cout << "corrupting section: " << type << " @ " << offset << "\n";
{
test::test_logger lgr;
test::os_access_mock os;
auto make_fs = [&] {
return filesystem_v2{lgr, os,
std::make_shared(corrupt_image)};
};
if (is_metadata_section) {
EXPECT_THAT([&] { make_fs(); },
::testing::ThrowsMessage(
::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type))));
} else {
auto fs = make_fs();
auto& log = lgr.get_log();
if (is_block) {
EXPECT_EQ(0, log.size());
} else {
ASSERT_EQ(1, log.size());
EXPECT_THAT(log[0].output,
::testing::HasSubstr(
fmt::format("checksum error in section: {}", type)));
}
auto info = fs.info_as_dynamic(3);
ASSERT_EQ(1, info.count("sections"));
ASSERT_EQ(section_offsets.size(), info["sections"].size());
for (auto const& [i, section] : folly::enumerate(info["sections"])) {
EXPECT_EQ(section["checksum_ok"].asBool(), i != index)
<< type << ", " << index;
}
auto dump = fs.dump(3);
EXPECT_THAT(dump, ::testing::HasSubstr("CHECKSUM ERROR"));
}
}
{
auto t = dwarfsck_tester::create_with_image(corrupt_image);
if (is_metadata_section) {
EXPECT_EQ(1, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
} else {
EXPECT_EQ(0, t.run({"image.dwarfs", "--no-check", "-j"})) << t.err();
}
// for blocks, we skip checks with --no-check
if (!is_block) {
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
}
auto json = t.out();
// std::cout << "[" << type << ", nocheck]\n" << json << "\n";
if (is_metadata_section) {
EXPECT_EQ(0, json.size()) << json;
} else {
EXPECT_GT(json.size(), 100) << json;
EXPECT_NO_THROW(folly::parseJson(json)) << json;
}
}
{
auto t = dwarfsck_tester::create_with_image(corrupt_image);
EXPECT_EQ(1, t.run({"image.dwarfs", "-j"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
auto json = t.out();
// std::cout << "[" << type << "]\n" << json << "\n";
if (is_metadata_section) {
EXPECT_EQ(0, json.size()) << json;
} else {
EXPECT_GT(json.size(), 100) << json;
EXPECT_NO_THROW(folly::parseJson(json)) << json;
}
}
{
auto t = dwarfsck_tester::create_with_image(corrupt_image);
EXPECT_EQ(1, t.run({"image.dwarfs", "--check-integrity", "-j"}))
<< t.err();
if (is_block) {
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"integrity check error in section: BLOCK")));
} else {
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
}
auto json = t.out();
// std::cout << "[" << type << ", integrity]\n" << json << "\n";
if (is_metadata_section) {
EXPECT_EQ(0, json.size()) << json;
} else {
EXPECT_GT(json.size(), 100) << json;
EXPECT_NO_THROW(folly::parseJson(json)) << json;
}
}
{
auto t = dwarfsck_tester::create_with_image(corrupt_image);
EXPECT_EQ(1, t.run({"image.dwarfs", "-d3"})) << t.err();
EXPECT_THAT(t.err(), ::testing::HasSubstr(fmt::format(
"checksum error in section: {}", type)));
if (is_metadata_section) {
EXPECT_EQ(0, t.out().size()) << t.out();
} else {
EXPECT_THAT(t.out(), ::testing::HasSubstr("CHECKSUM ERROR"));
}
}
++index;
}
}
TEST(dwarfsck_test, print_header_and_export_metadata) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_NE(0, t.run({"image.dwarfs", "--print-header",
"--export-metadata=image.meta"}))
<< t.err();
EXPECT_THAT(
t.err(),
::testing::ContainsRegex(
"--print-header is mutually exclusive with.*--export-metadata"));
}
TEST(dwarfsck_test, print_header_and_check_integrity) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_NE(0, t.run({"image.dwarfs", "--print-header", "--check-integrity"}))
<< t.err();
EXPECT_THAT(
t.err(),
::testing::ContainsRegex(
"--print-header is mutually exclusive with.*--check-integrity"));
}
TEST(dwarfsck_test, print_header_no_header) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_EQ(2, t.run({"image.dwarfs", "--print-header"})) << t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("filesystem does not contain a header"));
}
TEST(dwarfsck_test, export_metadata) {
auto t = dwarfsck_tester::create_with_image();
ASSERT_EQ(0, t.run({"image.dwarfs", "--export-metadata=image.meta"}))
<< t.err();
auto meta = t.fa->get_file("image.meta");
ASSERT_TRUE(meta);
EXPECT_GT(meta->size(), 1000);
EXPECT_NO_THROW(folly::parseJson(meta.value()));
}
TEST(dwarfsck_test, export_metadata_open_error) {
auto t = dwarfsck_tester::create_with_image();
t.fa->set_open_error(
"image.meta", std::make_error_code(std::errc::device_or_resource_busy));
EXPECT_NE(0, t.run({"image.dwarfs", "--export-metadata=image.meta"}))
<< t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("failed to open metadata output file"));
}
TEST(dwarfsck_test, export_metadata_close_error) {
auto t = dwarfsck_tester::create_with_image();
t.fa->set_close_error("image.meta",
std::make_error_code(std::errc::no_space_on_device));
EXPECT_NE(0, t.run({"image.dwarfs", "--export-metadata=image.meta"}))
<< t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("failed to close metadata output file"));
}
TEST(dwarfsck_test, checksum_algorithm_not_available) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_NE(0, t.run({"image.dwarfs", "--checksum=grmpf"})) << t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("checksum algorithm not available: grmpf"));
}
TEST(dwarfsck_test, list_files) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_EQ(0, t.run({"image.dwarfs", "--list"})) << t.err();
auto out = t.out();
std::set files;
folly::splitTo('\n', out, std::inserter(files, files.end()),
true);
std::set const expected{
"test.pl", "somelink", "somedir", "foo.pl",
"bar.pl", "baz.pl", "ipsum.txt", "somedir/ipsum.py",
"somedir/bad", "somedir/empty", "empty",
};
EXPECT_EQ(expected, files);
}
TEST(dwarfsck_test, list_files_verbose) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_EQ(0, t.run({"image.dwarfs", "--list", "--verbose"})) << t.err();
auto out = t.out();
auto num_lines = std::count(out.begin(), out.end(), '\n');
EXPECT_EQ(12, num_lines);
std::vector expected_re{
fmt::format("drwxrwxrwx\\s+1000/100\\s+8\\s+{:%Y-%m-%d %H:%M}\\s*\n",
fmt::localtime(2)),
fmt::format(
"-rw-------\\s+1337/ 0\\s+{:L}\\s+{:%Y-%m-%d %H:%M}\\s+baz.pl\n",
23456, fmt::localtime(8002)),
fmt::format("lrwxrwxrwx\\s+1000/100\\s+16\\s+{:%Y-%m-%d "
"%H:%M}\\s+somelink -> somedir/ipsum.py\n",
fmt::localtime(2002)),
};
for (auto const& str : expected_re) {
std::regex re{str};
EXPECT_TRUE(std::regex_search(out, re)) << "[" << str << "]\n" << out;
}
}
TEST(dwarfsck_test, checksum_files) {
auto t = dwarfsck_tester::create_with_image();
EXPECT_EQ(0, t.run({"image.dwarfs", "--checksum=md5"})) << t.err();
auto out = t.out();
auto num_lines = std::count(out.begin(), out.end(), '\n');
EXPECT_EQ(8, num_lines);
std::map actual;
std::vector lines;
folly::split('\n', out, lines);
for (auto const& line : lines) {
if (line.empty()) {
continue;
}
std::string file, hash;
folly::split(" ", line, hash, file);
EXPECT_TRUE(actual.emplace(file, hash).second);
}
std::map const expected{
{"empty", "d41d8cd98f00b204e9800998ecf8427e"},
{"somedir/empty", "d41d8cd98f00b204e9800998ecf8427e"},
{"test.pl", "d41d8cd98f00b204e9800998ecf8427e"},
{"baz.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
{"somedir/ipsum.py", "70fe813c36ed50ebd7f4991857683676"},
{"foo.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
{"bar.pl", "e2bd36391abfd15dcc83cbdfb60a6bc3"},
{"ipsum.txt", "0782b6a546cedd8be8fc86ac47dc6d96"},
};
EXPECT_EQ(expected, actual);
}
class mkdwarfs_sim_order_test : public testing::TestWithParam {};
TEST(mkdwarfs_test, max_similarity_size) {
static constexpr std::array sizes{50, 100, 200, 500, 1000, 2000, 5000, 10000};
auto make_tester = [] {
std::mt19937_64 rng{42};
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
for (auto size : sizes) {
auto data = test::create_random_string(size, rng);
t.os->add_file("/file" + std::to_string(size), data);
}
return t;
};
auto get_sizes_in_offset_order = [](filesystem_v2 const& fs) {
std::vector> tmp;
for (auto size : sizes) {
auto path = "/file" + std::to_string(size);
auto iv = fs.find(path.c_str());
assert(iv);
auto info = fs.get_inode_info(*iv);
assert(1 == info["chunks"].size());
auto const& chunk = info["chunks"][0];
tmp.emplace_back(chunk["offset"].asInt(), chunk["size"].asInt());
}
std::sort(tmp.begin(), tmp.end(),
[](auto const& a, auto const& b) { return a.first < b.first; });
std::vector sizes;
std::transform(tmp.begin(), tmp.end(), std::back_inserter(sizes),
[](auto const& p) { return p.second; });
return sizes;
};
auto partitioned_sizes = [&](std::vector in, size_t max_size) {
auto mid = std::stable_partition(
in.begin(), in.end(), [=](auto size) { return size > max_size; });
std::sort(in.begin(), mid, std::greater());
return in;
};
std::vector sim_ordered_sizes;
std::vector nilsimsa_ordered_sizes;
{
auto t = make_tester();
ASSERT_EQ(0, t.run("-i / -o - -l0 --order=similarity")) << t.err();
auto fs = t.fs_from_stdout();
sim_ordered_sizes = get_sizes_in_offset_order(fs);
}
{
auto t = make_tester();
ASSERT_EQ(0, t.run("-i / -o - -l0 --order=nilsimsa")) << t.err();
auto fs = t.fs_from_stdout();
nilsimsa_ordered_sizes = get_sizes_in_offset_order(fs);
}
EXPECT_FALSE(
std::is_sorted(sim_ordered_sizes.begin(), sim_ordered_sizes.end()));
static constexpr std::array max_sim_sizes{0, 1, 200, 999,
1000, 1001, 5000, 10000};
std::set nilsimsa_results;
for (auto max_sim_size : max_sim_sizes) {
{
auto t = make_tester();
EXPECT_EQ(0,
t.run(fmt::format(
"-i / -o - -l0 --order=similarity --max-similarity-size={}",
max_sim_size)))
<< t.err();
auto fs = t.fs_from_stdout();
auto ordered_sizes = get_sizes_in_offset_order(fs);
if (max_sim_size == 0) {
EXPECT_EQ(sim_ordered_sizes, ordered_sizes) << max_sim_size;
} else {
auto partitioned = partitioned_sizes(sim_ordered_sizes, max_sim_size);
EXPECT_EQ(partitioned, ordered_sizes) << max_sim_size;
}
}
{
auto t = make_tester();
EXPECT_EQ(0,
t.run(fmt::format(
"-i / -o - -l0 --order=nilsimsa --max-similarity-size={}",
max_sim_size)))
<< t.err();
auto fs = t.fs_from_stdout();
auto ordered_sizes = get_sizes_in_offset_order(fs);
nilsimsa_results.insert(folly::join(",", ordered_sizes));
if (max_sim_size == 0) {
EXPECT_EQ(nilsimsa_ordered_sizes, ordered_sizes) << max_sim_size;
} else {
std::vector expected;
std::copy_if(sizes.begin(), sizes.end(), std::back_inserter(expected),
[=](auto size) { return size > max_sim_size; });
std::sort(expected.begin(), expected.end(), std::greater());
ordered_sizes.resize(expected.size());
EXPECT_EQ(expected, ordered_sizes) << max_sim_size;
}
}
}
EXPECT_GE(nilsimsa_results.size(), 3);
}
TEST(mkdwarfs_test, low_memory_limit) {
{
mkdwarfs_tester t;
EXPECT_EQ(
0, t.run("-i / -o - -l5 --log-level=warn -S 27 --num-workers=8 -L 1g"));
EXPECT_THAT(t.err(),
::testing::Not(::testing::HasSubstr("low memory limit")));
}
{
mkdwarfs_tester t;
EXPECT_EQ(
0, t.run("-i / -o - -l5 --log-level=warn -S 28 --num-workers=8 -L 1g"));
EXPECT_THAT(t.err(), ::testing::HasSubstr("low memory limit"));
}
}
TEST(mkdwarfs_test, recoverable_errors) {
{
mkdwarfs_tester t;
t.os->set_access_fail("/somedir/ipsum.py");
EXPECT_EQ(2, t.run("-i / -o - -l4")) << t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("filesystem created with 1 error"));
}
{
mkdwarfs_tester t;
t.os->set_access_fail("/somedir/ipsum.py");
t.os->set_access_fail("/baz.pl");
EXPECT_EQ(2, t.run("-i / -o - -l4")) << t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("filesystem created with 2 errors"));
}
}
TEST(mkdwarfs_test, filesystem_read_error) {
mkdwarfs_tester t;
EXPECT_EQ(0, t.run("-i / -o -")) << t.err();
auto fs = t.fs_from_stdout();
auto iv = fs.find("/somedir");
ASSERT_TRUE(iv);
EXPECT_TRUE(iv->is_directory());
EXPECT_EQ(-1, fs.open(*iv));
{
char buf[1];
EXPECT_EQ(-EBADF, fs.read(iv->inode_num(), buf, sizeof(buf), 0));
}
{
iovec_read_buf buf;
EXPECT_EQ(-EBADF, fs.readv(iv->inode_num(), buf, 42, 0));
}
{
auto res = fs.readv(iv->inode_num(), 42, 0);
EXPECT_FALSE(res);
EXPECT_EQ(-EBADF, res.error());
}
}
class segmenter_repeating_sequence_test : public testing::TestWithParam {
};
TEST_P(segmenter_repeating_sequence_test, github161) {
auto byte = GetParam();
static constexpr int const final_bytes{10'000'000};
static constexpr int const repetitions{2'000};
auto match = test::create_random_string(5'000);
auto suffix = test::create_random_string(50);
auto sequence = std::string(3'000, byte);
std::string content;
content.reserve(match.size() + suffix.size() +
(sequence.size() + match.size()) * repetitions + final_bytes);
content += match + suffix;
for (int i = 0; i < repetitions; ++i) {
content += sequence + match;
}
content += std::string(final_bytes, byte);
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_file("/bug", content);
ASSERT_EQ(0, t.run("-i / -o - -C lz4 -W12 --log-level=verbose --no-progress"))
<< t.err();
auto log = t.err();
{
std::regex const re{fmt::format(
"avoided \\d\\d\\d\\d+ collisions in 0x{:02x}-byte sequences", byte)};
EXPECT_TRUE(std::regex_search(log, re)) << log;
}
{
std::regex const re{"segment matches: good=(\\d+), bad=(\\d+), "
"collisions=(\\d+), total=(\\d+)"};
std::smatch m;
ASSERT_TRUE(std::regex_search(log, m, re)) << log;
auto good = std::stoi(m[1]);
auto bad = std::stoi(m[2]);
auto collisions = std::stoi(m[3]);
auto total = std::stoi(m[4]);
EXPECT_GT(good, 2000);
EXPECT_EQ(0, bad);
EXPECT_EQ(0, collisions);
EXPECT_GT(total, 2000);
}
}
INSTANTIATE_TEST_SUITE_P(dwarfs, segmenter_repeating_sequence_test,
::testing::Values('\0', 'G', '\xff'));
TEST(mkdwarfs_test, map_file_error) {
mkdwarfs_tester t;
t.os->set_map_file_error(
"/somedir/ipsum.py",
std::make_exception_ptr(std::runtime_error("map_file_error")));
EXPECT_EQ(2, t.run("-i / -o - --categorize")) << t.err();
EXPECT_THAT(t.err(),
::testing::HasSubstr("map_file_error, creating empty inode"));
EXPECT_THAT(t.err(), ::testing::HasSubstr("filesystem created with 1 error"));
}
class map_file_error_test : public testing::TestWithParam {};
TEST_P(map_file_error_test, delayed) {
std::string extra_args{GetParam()};
auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
auto files = t.add_random_file_tree({.avg_size = 64.0,
.dimension = 20,
.max_name_len = 8,
.with_errors = true});
static constexpr size_t const kSizeSmall{1 << 10};
static constexpr size_t const kSizeLarge{1 << 20};
auto gen_small = [] { return test::loremipsum(kSizeLarge); };
auto gen_large = [] { return test::loremipsum(kSizeLarge); };
t.os->add("large_link1", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
gen_large);
t.os->add("large_link2", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
gen_large);
t.os->add("small_link1", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
gen_small);
t.os->add("small_link2", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
gen_small);
for (auto const& link :
{"large_link1", "large_link2", "small_link1", "small_link2"}) {
t.os->set_map_file_error(
fs::path{"/"} / link,
std::make_exception_ptr(std::runtime_error("map_file_error")), 0);
}
{
std::mt19937_64 rng{42};
for (auto const& p : fs::recursive_directory_iterator(audio_data_dir)) {
if (p.is_regular_file()) {
auto fp = fs::relative(p.path(), audio_data_dir);
std::string contents;
ASSERT_TRUE(folly::readFile(p.path().string().c_str(), contents));
files.emplace_back(fp, std::move(contents));
if (rng() % 2 == 0) {
t.os->set_map_file_error(
fs::path{"/"} / fp,
std::make_exception_ptr(std::runtime_error("map_file_error")),
rng() % 4);
}
}
}
}
t.os->setenv("DWARFS_DUMP_INODES", "inodes.dump");
// t.iol->use_real_terminal(true);
std::string args = "-i / -o test.dwarfs --no-progress --log-level=verbose";
if (!extra_args.empty()) {
args += " " + extra_args;
}
EXPECT_EQ(2, t.run(args)) << t.err();
auto fs = t.fs_from_file("test.dwarfs", {.metadata = {.enable_nlink = true}});
// fs.dump(std::cout, 2);
{
auto large_link1 = fs.find("/large_link1");
auto large_link2 = fs.find("/large_link2");
auto small_link1 = fs.find("/small_link1");
auto small_link2 = fs.find("/small_link2");
ASSERT_TRUE(large_link1);
ASSERT_TRUE(large_link2);
ASSERT_TRUE(small_link1);
ASSERT_TRUE(small_link2);
EXPECT_EQ(large_link1->inode_num(), large_link2->inode_num());
EXPECT_EQ(small_link1->inode_num(), small_link2->inode_num());
file_stat st;
ASSERT_EQ(0, fs.getattr(*large_link1, &st));
EXPECT_EQ(0, st.size);
ASSERT_EQ(0, fs.getattr(*small_link1, &st));
EXPECT_EQ(0, st.size);
}
std::unordered_map actual_files;
fs.walk([&](auto const& dev) {
auto iv = dev.inode();
if (iv.is_regular_file()) {
std::string data;
file_stat stat;
ASSERT_EQ(0, fs.getattr(iv, &stat));
data.resize(stat.size);
ASSERT_EQ(data.size(), fs.read(iv.inode_num(), data.data(), data.size()));
ASSERT_TRUE(actual_files.emplace(dev.fs_path(), std::move(data)).second);
}
});
// check that:
// - all original files are present
// - they're either empty (in case of errors) or have the original content
size_t num_non_empty = 0;
auto failed_expected = t.os->get_failed_paths();
std::set failed_actual;
for (auto const& [path, data] : files) {
auto it = actual_files.find(path);
ASSERT_NE(actual_files.end(), it);
if (!it->second.empty()) {
EXPECT_EQ(data, it->second);
++num_non_empty;
} else if (!data.empty()) {
failed_actual.insert(fs::path("/") / path);
} else {
failed_expected.erase(fs::path("/") / path);
}
}
EXPECT_LE(failed_actual.size(), failed_expected.size());
EXPECT_GT(files.size(), 8000);
EXPECT_GT(num_non_empty, 4000);
// Ensure that files which never had any errors are all present
std::set surprisingly_missing;
std::set_difference(
failed_actual.begin(), failed_actual.end(), failed_expected.begin(),
failed_expected.end(),
std::inserter(surprisingly_missing, surprisingly_missing.begin()));
std::unordered_map original_files(
files.begin(), files.end());
EXPECT_EQ(0, surprisingly_missing.size());
for (auto const& path : surprisingly_missing) {
std::cout << "surprisingly missing: " << path << "\n";
auto it = original_files.find(path.relative_path());
ASSERT_NE(original_files.end(), it);
std::cout << "--- original (" << it->second.size() << " bytes) ---\n";
std::cout << folly::hexDump(it->second.data(), it->second.size()) << "\n";
}
auto dump = t.fa->get_file("inodes.dump");
ASSERT_TRUE(dump);
if (extra_args.find("--file-hash=none") == std::string::npos) {
EXPECT_THAT(dump.value(), ::testing::HasSubstr("(invalid)"))
<< dump.value();
}
if (extra_args.find("--order=revpath") != std::string::npos) {
EXPECT_THAT(dump.value(), ::testing::HasSubstr("similarity: none"))
<< dump.value();
} else {
EXPECT_THAT(dump.value(), ::testing::HasSubstr("similarity: nilsimsa"))
<< dump.value();
}
if (extra_args.find("--categorize") != std::string::npos) {
EXPECT_THAT(dump.value(), ::testing::HasSubstr("[incompressible]"))
<< dump.value();
}
}
namespace {
std::array const map_file_error_args{
"",
"--categorize",
"--order=revpath",
"--order=revpath --categorize",
"--file-hash=none",
"--file-hash=none --categorize",
"--file-hash=none --order=revpath",
"--file-hash=none --order=revpath --categorize",
};
} // namespace
INSTANTIATE_TEST_SUITE_P(dwarfs, map_file_error_test,
::testing::ValuesIn(map_file_error_args));