/* vim:set ts=2 sw=2 sts=2 et: */ /** * \author Marcus Holland-Moritz (github@mhxnet.de) * \copyright Copyright (c) Marcus Holland-Moritz * * This file is part of dwarfs. * * dwarfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * dwarfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with dwarfs. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #endif #include #include #include #include #include #include #include #include #include #include "dwarfs/block_compressor.h" #include "dwarfs/block_compressor_parser.h" #include "dwarfs/builtin_script.h" #include "dwarfs/categorizer.h" #include "dwarfs/category_parser.h" #include "dwarfs/chmod_entry_transformer.h" #include "dwarfs/console_writer.h" #include "dwarfs/entry.h" #include "dwarfs/error.h" #include "dwarfs/file_access.h" #include "dwarfs/filesystem_block_category_resolver.h" #include "dwarfs/filesystem_v2.h" #include "dwarfs/filesystem_writer.h" #include "dwarfs/filter_debug.h" #include "dwarfs/fragment_order_parser.h" #include "dwarfs/integral_value_parser.h" #include "dwarfs/iolayer.h" #include "dwarfs/library_dependencies.h" #include "dwarfs/logger.h" #include "dwarfs/match.h" #include "dwarfs/mmap.h" #include "dwarfs/options.h" #include "dwarfs/options_interface.h" #include "dwarfs/os_access.h" #include "dwarfs/program_options_helpers.h" #include "dwarfs/progress.h" #include "dwarfs/scanner.h" #include "dwarfs/script.h" #include "dwarfs/segmenter_factory.h" #include "dwarfs/terminal.h" #include "dwarfs/tool.h" #include "dwarfs/util.h" #include "dwarfs_tool_main.h" namespace po = boost::program_options; namespace dwarfs { namespace { const std::map progress_modes{ {"none", console_writer::NONE}, {"simple", console_writer::SIMPLE}, {"ascii", console_writer::ASCII}, {"unicode", console_writer::UNICODE}, }; const std::string default_progress_mode = "unicode"; const std::map debug_filter_modes{ {"included", debug_filter_mode::INCLUDED}, {"included-files", debug_filter_mode::INCLUDED_FILES}, {"excluded", debug_filter_mode::EXCLUDED}, {"excluded-files", debug_filter_mode::EXCLUDED_FILES}, {"files", debug_filter_mode::FILES}, {"all", debug_filter_mode::ALL}, }; const std::map time_resolutions{ {"sec", 1}, {"min", 60}, {"hour", 3600}, {"day", 86400}, }; constexpr size_t min_block_size_bits{10}; constexpr size_t max_block_size_bits{30}; struct level_defaults { unsigned block_size_bits; std::string_view data_compression; std::string_view schema_history_compression; std::string_view metadata_compression; unsigned window_size; unsigned window_step; std::string_view order; }; #if defined(DWARFS_HAVE_LIBLZ4) #define ALG_DATA_1 "lz4" #define ALG_DATA_2 "lz4hc:level=9" #define ALG_DATA_3 "lz4hc:level=9" #elif defined(DWARFS_HAVE_LIBZSTD) #define ALG_DATA_1 "zstd:level=1" #define ALG_DATA_2 "zstd:level=4" #define ALG_DATA_3 "zstd:level=7" #elif defined(DWARFS_HAVE_LIBLZMA) #define ALG_DATA_1 "lzma:level=1" #define ALG_DATA_2 "lzma:level=2" #define ALG_DATA_3 "lzma:level=3" #else #define ALG_DATA_1 "null" #define ALG_DATA_2 "null" #define ALG_DATA_3 "null" #endif #if defined(DWARFS_HAVE_LIBZSTD) #define ALG_DATA_4 "zstd:level=11" #define ALG_DATA_5 "zstd:level=19" #define ALG_DATA_6 "zstd:level=22" #define ALG_DATA_7 "zstd:level=22" #elif defined(DWARFS_HAVE_LIBLZMA) #define ALG_DATA_4 "lzma:level=3" #define ALG_DATA_5 "lzma:level=4" #define ALG_DATA_6 "lzma:level=5" #define ALG_DATA_7 "lzma:level=8" #elif defined(DWARFS_HAVE_LIBLZ4) #define ALG_DATA_4 "lz4hc:level=9" #define ALG_DATA_5 "lz4hc:level=9" #define ALG_DATA_6 "lz4hc:level=9" #define ALG_DATA_7 "lz4hc:level=9" #else #define ALG_DATA_4 "null" #define ALG_DATA_5 "null" #define ALG_DATA_6 "null" #define ALG_DATA_7 "null" #endif #if defined(DWARFS_HAVE_LIBLZMA) #define ALG_DATA_8 "lzma:level=9" #define ALG_DATA_9 "lzma:level=9" #elif defined(DWARFS_HAVE_LIBZSTD) #define ALG_DATA_8 "zstd:level=22" #define ALG_DATA_9 "zstd:level=22" #elif defined(DWARFS_HAVE_LIBLZ4) #define ALG_DATA_8 "lz4hc:level=9" #define ALG_DATA_9 "lz4hc:level=9" #else #define ALG_DATA_8 "null" #define ALG_DATA_9 "null" #endif #if defined(DWARFS_HAVE_LIBZSTD) #define ALG_SCHEMA "zstd:level=16" #elif defined(DWARFS_HAVE_LIBLZMA) #define ALG_SCHEMA "lzma:level=4" #elif defined(DWARFS_HAVE_LIBLZ4) #define ALG_SCHEMA "lz4hc:level=9" #else #define ALG_SCHEMA "null" #endif #if defined(DWARFS_HAVE_LIBZSTD) #define ALG_METADATA_7 "zstd:level=22" #elif defined(DWARFS_HAVE_LIBLZMA) #define ALG_METADATA_7 "lzma:level=9" #elif defined(DWARFS_HAVE_LIBLZ4) #define ALG_METADATA_7 "lz4hc:level=9" #else #define ALG_METADATA_7 "null" #endif #if defined(DWARFS_HAVE_LIBLZMA) #define ALG_METADATA_9 "lzma:level=9" #elif defined(DWARFS_HAVE_LIBZSTD) #define ALG_METADATA_9 "zstd:level=22" #elif defined(DWARFS_HAVE_LIBLZ4) #define ALG_METADATA_9 "lz4hc:level=9" #else #define ALG_METADATA_9 "null" #endif constexpr std::array levels{{ // clang-format off /* 0 */ {20, "null", "null" , "null", 0, 0, "none"}, /* 1 */ {20, ALG_DATA_1, ALG_SCHEMA, "null", 0, 0, "path"}, /* 2 */ {20, ALG_DATA_2, ALG_SCHEMA, "null", 0, 0, "path"}, /* 3 */ {21, ALG_DATA_3, ALG_SCHEMA, "null", 12, 1, "similarity"}, /* 4 */ {22, ALG_DATA_4, ALG_SCHEMA, "null", 12, 2, "similarity"}, /* 5 */ {23, ALG_DATA_5, ALG_SCHEMA, "null", 12, 2, "similarity"}, /* 6 */ {24, ALG_DATA_6, ALG_SCHEMA, "null", 12, 3, "nilsimsa"}, /* 7 */ {24, ALG_DATA_7, ALG_SCHEMA, ALG_METADATA_7, 12, 3, "nilsimsa"}, /* 8 */ {24, ALG_DATA_8, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa"}, /* 9 */ {26, ALG_DATA_9, ALG_SCHEMA, ALG_METADATA_9, 12, 4, "nilsimsa"}, // clang-format on }}; const std::unordered_map> categorize_defaults_common{ // clang-format off {"--compression", {"incompressible::null"}}, // clang-format on }; const std::unordered_map> categorize_defaults_fast{ // clang-format off {"--order", {"pcmaudio/waveform::revpath", "fits/image::revpath"}}, {"--window-size", {"pcmaudio/waveform::0", "fits/image::0"}}, {"--compression", { #ifdef DWARFS_HAVE_FLAC "pcmaudio/waveform::flac:level=3", #else "pcmaudio/waveform::zstd:level=3", #endif #ifdef DWARFS_HAVE_RICEPP "fits/image::ricepp", #else "fits/image::zstd:level=3", #endif }}, // clang-format on }; const std::unordered_map> categorize_defaults_medium{ // clang-format off {"--order", {"pcmaudio/waveform::revpath", "fits/image::revpath"}}, {"--window-size", {"pcmaudio/waveform::20", "fits/image::0"}}, {"--compression", { #ifdef DWARFS_HAVE_FLAC "pcmaudio/waveform::flac:level=5", #else "pcmaudio/waveform::zstd:level=5", #endif #ifdef DWARFS_HAVE_RICEPP "fits/image::ricepp", #else "fits/image::zstd:level=5", #endif }}, // clang-format on }; const std::unordered_map> categorize_defaults_slow{ // clang-format off {"--order", {"fits/image::revpath"}}, {"--window-size", {"pcmaudio/waveform::16", "fits/image::0"}}, {"--compression", { #ifdef DWARFS_HAVE_FLAC "pcmaudio/waveform::flac:level=8", #else "pcmaudio/waveform::zstd:level=8", #endif #ifdef DWARFS_HAVE_RICEPP "fits/image::ricepp", #else "fits/image::zstd:level=8", #endif }}, // clang-format on }; constexpr std::array< std::unordered_map> const*, 10> categorize_defaults_level{{ // clang-format off /* 0 */ &categorize_defaults_fast, /* 1 */ &categorize_defaults_fast, /* 2 */ &categorize_defaults_fast, /* 3 */ &categorize_defaults_fast, /* 4 */ &categorize_defaults_fast, /* 5 */ &categorize_defaults_medium, /* 6 */ &categorize_defaults_medium, /* 7 */ &categorize_defaults_medium, /* 8 */ &categorize_defaults_slow, /* 9 */ &categorize_defaults_slow, // clang-format on }}; constexpr unsigned default_level = 7; class categorize_optval { public: std::string value; bool is_explicit{false}; categorize_optval() = default; explicit categorize_optval(std::string const& val, bool expl = false) : value{val} , is_explicit{expl} {} bool is_implicit_default() const { return !value.empty() && !is_explicit; } template void add_implicit_defaults(T& cop) const { if (is_implicit_default()) { if (auto it = defaults_.find(cop.name()); it != defaults_.end()) { for (auto const& value : it->second) { cop.parse_fallback(value); } } } } void add_defaults(std::unordered_map> const& defaults) { for (auto const& [key, values] : defaults) { auto& vs = defaults_[key]; vs.insert(vs.end(), values.begin(), values.end()); } } private: std::unordered_map> defaults_; }; std::ostream& operator<<(std::ostream& os, categorize_optval const& optval) { return os << optval.value << (optval.is_explicit ? " (explicit)" : ""); } void validate(boost::any& v, std::vector const& values, categorize_optval*, int) { po::validators::check_first_occurrence(v); v = categorize_optval{po::validators::get_single_string(values), true}; } } // namespace int mkdwarfs_main(int argc, sys_char** argv, iolayer const& iol) { using namespace folly::gen; const size_t num_cpu = std::max(folly::hardware_concurrency(), 1u); static constexpr size_t const kDefaultMaxActiveBlocks{1}; static constexpr size_t const kDefaultBloomFilterSize{4}; segmenter_factory::config sf_config; sys_string path_str, input_list_str, output_str, header_str; std::string memory_limit, script_arg, schema_compression, metadata_compression, timestamp, time_resolution, progress_mode, recompress_opts, pack_metadata, file_hash_algo, debug_filter, max_similarity_size, chmod_str, history_compression, recompress_categories; std::vector filter; std::vector order, max_lookback_blocks, window_size, window_step, bloom_filter_size, compression; size_t num_workers, num_scanner_workers, num_segmenter_workers; bool no_progress = false, remove_header = false, no_section_index = false, force_overwrite = false, no_history = false, no_history_timestamps = false, no_history_command_line = false; unsigned level; int compress_niceness; uint16_t uid, gid; categorize_optval categorizer_list; integral_value_parser max_lookback_parser; integral_value_parser window_size_parser(0, 24); integral_value_parser window_step_parser(0, 8); integral_value_parser bloom_filter_size_parser(0, 10); fragment_order_parser order_parser; block_compressor_parser compressor_parser; scanner_options options; logger_options logopts; auto order_desc = "inode fragments order (" + order_parser.choices() + ")"; auto progress_desc = "progress mode (" + (from(progress_modes) | get<0>() | unsplit(", ")) + ")"; auto debug_filter_desc = "show effect of filter rules without producing an image (" + (from(debug_filter_modes) | get<0>() | unsplit(", ")) + ")"; auto resolution_desc = "time resolution in seconds or (" + (from(time_resolutions) | get<0>() | unsplit(", ")) + ")"; auto hash_list = checksum::available_algorithms(); auto file_hash_desc = "choice of file hashing function (none, " + (from(hash_list) | unsplit(", ")) + ")"; auto& catreg = categorizer_registry::instance(); auto categorize_desc = "enable categorizers in the given order (" + (from(catreg.categorizer_names()) | unsplit(", ")) + ")"; auto lvl_def_val = [](auto opt) { return fmt::format("arg (={})", levels[default_level].*opt); }; auto dep_def_val = [](auto dep) { return fmt::format("arg (={})", dep); }; auto cat_def_val = [](auto def) { return fmt::format("[cat::]arg (={})", def); }; auto lvl_cat_def_val = [](auto opt) { return fmt::format("[cat::]arg (={})", levels[default_level].*opt); }; // clang-format off po::options_description basic_opts("Options"); basic_opts.add_options() ("input,i", po_sys_value(&path_str), "path to root directory or source filesystem") ("input-list", po_sys_value(&input_list_str), "file containing list of file paths relative to root directory " "or - for stdin") ("output,o", po_sys_value(&output_str), "filesystem output name or - for stdout") ("force,f", po::value(&force_overwrite)->zero_tokens(), "force overwrite of existing output image") ("compress-level,l", po::value(&level)->default_value(default_level), "compression level (0=fast, 9=best, please see man page for details)") ; add_common_options(basic_opts, logopts); basic_opts.add_options() ("long-help,H", "output full help message and exit") ; po::options_description advanced_opts("Advanced options"); advanced_opts.add_options() ("block-size-bits,S", po::value(&sf_config.block_size_bits) ->value_name(lvl_def_val(&level_defaults::block_size_bits)), "block size bits (size = 2^arg bits)") ("num-workers,N", po::value(&num_workers)->default_value(num_cpu), "number of writer (compression) worker threads") ("compress-niceness", po::value(&compress_niceness)->default_value(5), "compression worker threads niceness") ("num-scanner-workers", po::value(&num_scanner_workers) ->value_name(dep_def_val("num-workers")), "number of scanner (hasher/categorizer) worker threads") ("num-segmenter-workers", po::value(&num_segmenter_workers) ->value_name(dep_def_val("num-workers")), "number of segmenter worker threads") ("memory-limit,L", po::value(&memory_limit)->default_value("1g"), "block manager memory limit") ("recompress", po::value(&recompress_opts)->implicit_value("all"), "recompress an existing filesystem (none, block, metadata, all)") ("recompress-categories", po::value(&recompress_categories), "only recompress blocks of these categories") ("categorize", po::value(&categorizer_list) ->implicit_value(categorize_optval("fits,pcmaudio,incompressible")), categorize_desc.c_str()) ("order", po::value>(&order) ->value_name(lvl_cat_def_val(&level_defaults::order)) ->multitoken()->composing(), order_desc.c_str()) ("max-similarity-size", po::value(&max_similarity_size), "maximum file size to compute similarity") ("file-hash", po::value(&file_hash_algo)->default_value("xxh3-128"), file_hash_desc.c_str()) ("progress", po::value(&progress_mode)->default_value(default_progress_mode), progress_desc.c_str()) ("no-progress", po::value(&no_progress)->zero_tokens(), "don't show progress") ; po::options_description filesystem_opts("File system options"); filesystem_opts.add_options() ("with-devices", po::value(&options.with_devices)->zero_tokens(), "include block and character devices") ("with-specials", po::value(&options.with_specials)->zero_tokens(), "include named fifo and sockets") ("header", po_sys_value(&header_str), "prepend output filesystem with contents of this file") ("remove-header", po::value(&remove_header)->zero_tokens(), "remove any header present before filesystem data" " (use with --recompress)") ("no-section-index", po::value(&no_section_index)->zero_tokens(), "don't add section index to file system") ("no-history", po::value(&no_history)->zero_tokens(), "don't add history to file system") ("no-history-timestamps", po::value(&no_history_timestamps)->zero_tokens(), "don't add timestamps to file system history") ("no-history-command-line", po::value(&no_history_command_line)->zero_tokens(), "don't add command line to file system history") ; po::options_description segmenter_opts("Segmenter options"); segmenter_opts.add_options() ("max-lookback-blocks,B", po::value>(&max_lookback_blocks) ->value_name(cat_def_val(kDefaultMaxActiveBlocks)) ->multitoken()->composing(), "how many blocks to scan for segments") ("window-size,W", po::value>(&window_size) ->value_name(lvl_cat_def_val(&level_defaults::window_size)) ->multitoken()->composing(), "window sizes for block hashing") ("window-step,w", po::value>(&window_step) ->value_name(lvl_cat_def_val(&level_defaults::window_step)) ->multitoken()->composing(), "window step (as right shift of size)") ("bloom-filter-size", po::value>(&bloom_filter_size) ->value_name(cat_def_val(kDefaultBloomFilterSize)) ->multitoken()->composing(), "bloom filter size (2^N*values bits)") ; po::options_description compressor_opts("Compressor options"); compressor_opts.add_options() ("compression,C", po::value>(&compression) ->value_name(lvl_cat_def_val(&level_defaults::data_compression)) ->multitoken()->composing(), "block compression algorithm") ("schema-compression", po::value(&schema_compression) ->value_name(lvl_def_val(&level_defaults::schema_history_compression)), "metadata schema compression algorithm") ("metadata-compression", po::value(&metadata_compression) ->value_name(lvl_def_val(&level_defaults::metadata_compression)), "metadata compression algorithm") ("history-compression", po::value(&history_compression) ->value_name(lvl_def_val(&level_defaults::schema_history_compression)), "history compression algorithm") ; po::options_description filter_opts("Filter options"); filter_opts.add_options() ("filter,F", po_sys_value>(&filter) ->multitoken()->composing(), "add filter rule") ("debug-filter", po::value(&debug_filter)->implicit_value("all"), debug_filter_desc.c_str()) ("remove-empty-dirs", po::value(&options.remove_empty_dirs)->zero_tokens(), "remove empty directories in file system") ; po::options_description metadata_opts("Metadata options"); metadata_opts.add_options() ("set-owner", po::value(&uid), "set owner (uid) for whole file system") ("set-group", po::value(&gid), "set group (gid) for whole file system") ("chmod", po::value(&chmod_str), "recursively apply permission changes") ("no-create-timestamp", po::value(&options.no_create_timestamp)->zero_tokens(), "don't add create timestamp to file system") ("set-time", po::value(×tamp), "set timestamp for whole file system (unixtime or 'now')") ("keep-all-times", po::value(&options.keep_all_times)->zero_tokens(), "save atime and ctime in addition to mtime") ("time-resolution", po::value(&time_resolution)->default_value("sec"), resolution_desc.c_str()) ("pack-metadata,P", po::value(&pack_metadata)->default_value("auto"), "pack certain metadata elements (auto, all, none, chunk_table, " "directories, shared_files, names, names_index, symlinks, " "symlinks_index, force, plain)") ; // clang-format on po::options_description opts; opts.add(basic_opts) .add(advanced_opts) .add(filter_opts) .add(segmenter_opts) .add(compressor_opts) .add(filesystem_opts) .add(metadata_opts); catreg.add_options(opts); po::variables_map vm; std::vector command_line; command_line.reserve(argc); for (int i = 0; i < argc; ++i) { command_line.emplace_back(sys_string_to_string(argv[i])); } try { auto parsed = po::parse_command_line(argc, argv, opts); po::store(parsed, vm); po::notify(vm); auto unrecognized = po::collect_unrecognized(parsed.options, po::include_positional); if (!unrecognized.empty()) { iol.err << "error: unrecognized argument(s) '" << sys_string_to_string(boost::join(unrecognized, " ")) << "'\n"; return 1; } } catch (po::error const& e) { iol.err << "error: " << e.what() << "\n"; return 1; } #ifdef DWARFS_BUILTIN_MANPAGE if (vm.count("man")) { show_manpage(manpage::get_mkdwarfs_manpage(), iol); return 0; } #endif auto constexpr usage = "Usage: mkdwarfs [OPTIONS...]\n"; if (vm.count("long-help")) { std::string_view constexpr block_data_hdr{"Block Data"}; std::string_view constexpr schema_history_hdr{"Schema/History"}; std::string_view constexpr metadata_hdr{"Metadata"}; size_t l_dc{block_data_hdr.size()}, l_sc{schema_history_hdr.size()}, l_mc{metadata_hdr.size()}, l_or{0}; for (auto const& l : levels) { l_dc = std::max(l_dc, l.data_compression.size()); l_sc = std::max(l_sc, l.schema_history_compression.size()); l_mc = std::max(l_mc, l.metadata_compression.size()); l_or = std::max(l_or, l.order.size()); } std::string sep(30 + l_dc + l_sc + l_mc + l_or, '-'); iol.out << tool_header("mkdwarfs") << library_dependencies::common_as_string() << "\n\n" << usage << opts << "\n" << "Compression level defaults:\n" << " " << sep << "\n" << fmt::format(" Level Block {:{}s} {:s} Inode\n", "Compression Algorithm", 4 + l_dc + l_sc + l_mc, "Window") << fmt::format(" Size {:{}s} {:{}s} {:{}s} {:6s}\n", block_data_hdr, l_dc, schema_history_hdr, l_sc, metadata_hdr, l_mc, "Size/Step Order") << " " << sep << "\n"; int level = 0; for (auto const& l : levels) { iol.out << fmt::format(" {:1d} {:2d} {:{}s} {:{}s} {:{}s}" " {:2d} / {:1d} {:{}s}", level, l.block_size_bits, l.data_compression, l_dc, l.schema_history_compression, l_sc, l.metadata_compression, l_mc, l.window_size, l.window_step, l.order, l_or) << "\n"; ++level; } iol.out << " " << sep << "\n"; iol.out << "\nCompression algorithms:\n"; compression_registry::instance().for_each_algorithm( [&iol](compression_type, compression_info const& info) { iol.out << fmt::format(" {:9}{}\n", info.name(), info.description()); for (auto const& opt : info.options()) { iol.out << fmt::format(" {}\n", opt); } }); iol.out << "\nCategories:\n"; for (auto const& name : catreg.categorizer_names()) { stream_logger lgr(iol.term, iol.err); auto categorizer = catreg.create(lgr, name, vm); iol.out << " [" << name << "]\n"; for (auto cat : categorizer->categories()) { iol.out << " " << cat << "\n"; } } iol.out << "\n"; return 0; } if (vm.count("help") or !(vm.count("input") or vm.count("input-list")) or (!vm.count("output") and !vm.count("debug-filter"))) { iol.out << tool_header("mkdwarfs") << library_dependencies::common_as_string() << "\n\n" << usage << "\n" << basic_opts << "\n"; return 0; } if (level >= levels.size()) { iol.err << "error: invalid compression level\n"; return 1; } auto const& defaults = levels[level]; categorizer_list.add_defaults(categorize_defaults_common); categorizer_list.add_defaults(*categorize_defaults_level[level]); if (!vm.count("block-size-bits")) { sf_config.block_size_bits = defaults.block_size_bits; } if (!vm.count("schema-compression")) { schema_compression = defaults.schema_history_compression; } if (!vm.count("history-compression")) { history_compression = defaults.schema_history_compression; } if (!vm.count("metadata-compression")) { metadata_compression = defaults.metadata_compression; } if (sf_config.block_size_bits < min_block_size_bits || sf_config.block_size_bits > max_block_size_bits) { iol.err << "error: block size must be between " << min_block_size_bits << " and " << max_block_size_bits << "\n"; return 1; } std::filesystem::path path(path_str); std::optional> input_list; if (vm.count("input-list")) { if (vm.count("filter")) { iol.err << "error: cannot combine --input-list and --filter\n"; return 1; } // implicitly turn on options.with_devices = true; options.with_specials = true; if (!vm.count("input")) { path = iol.os->current_path(); } std::filesystem::path input_list_path(input_list_str); std::unique_ptr ifs; std::istream* is; if (input_list_path == "-") { is = &iol.in; } else { std::error_code ec; ifs = iol.file->open_input(input_list_path, ec); if (ec) { iol.err << "cannot open input list file '" << input_list_path << "': " << ec.message() << "\n"; return 1; } is = &ifs->is(); } std::string line; input_list.emplace(); while (std::getline(*is, line)) { input_list->emplace_back(line); } } path = iol.os->canonical(path); bool recompress = vm.count("recompress"); rewrite_options rw_opts; if (recompress) { std::unordered_map const modes{ {"all", 3}, {"metadata", 2}, {"block", 1}, {"none", 0}, }; if (auto it = modes.find(recompress_opts); it != modes.end()) { rw_opts.recompress_block = it->second & 1; rw_opts.recompress_metadata = it->second & 2; } else { iol.err << "invalid recompress mode: " << recompress_opts << "\n"; return 1; } if (!recompress_categories.empty()) { std::string_view input = recompress_categories; if (input.front() == '!') { rw_opts.recompress_categories_exclude = true; input.remove_prefix(1); } folly::splitTo( ',', input, std::inserter(rw_opts.recompress_categories, rw_opts.recompress_categories.end())); } } if (file_hash_algo == "none") { options.file_hash_algorithm.reset(); } else if (checksum::is_available(file_hash_algo)) { options.file_hash_algorithm = file_hash_algo; } else { iol.err << "error: unknown file hash function '" << file_hash_algo << "'\n"; return 1; } if (vm.count("max-similarity-size")) { auto size = parse_size_with_unit(max_similarity_size); if (size > 0) { options.inode.max_similarity_scan_size = size; } } size_t mem_limit = parse_size_with_unit(memory_limit); if (!vm.count("num-scanner-workers")) { num_scanner_workers = num_workers; } if (!vm.count("num-segmenter-workers")) { num_segmenter_workers = num_workers; } options.num_segmenter_workers = num_segmenter_workers; if (vm.count("debug-filter")) { if (auto it = debug_filter_modes.find(debug_filter); it != debug_filter_modes.end()) { options.debug_filter_function = [&iol, mode = it->second](bool exclude, entry const* pe) { debug_filter_output(iol.out, exclude, pe, mode); }; no_progress = true; } else { iol.err << "error: invalid filter debug mode '" << debug_filter << "'\n"; return 1; } } if (!progress_modes.count(progress_mode)) { iol.err << "error: invalid progress mode '" << progress_mode << "'\n"; return 1; } if (no_progress) { progress_mode = "none"; } if (progress_mode != "none" && !iol.term->is_tty(iol.err)) { progress_mode = "simple"; } auto pg_mode = DWARFS_NOTHROW(progress_modes.at(progress_mode)); console_writer lgr( iol.term, iol.err, pg_mode, recompress ? console_writer::REWRITE : console_writer::NORMAL, logopts); std::shared_ptr