/* vim:set ts=2 sw=2 sts=2 et: */ /** * \author Marcus Holland-Moritz (github@mhxnet.de) * \copyright Copyright (c) Marcus Holland-Moritz * * This file is part of dwarfs. * * dwarfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * dwarfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with dwarfs. If not, see . */ #include #include #include #include #include #include #include #include #include namespace dwarfs { namespace { std::unordered_map const lzma_error_desc{ {LZMA_NO_CHECK, "input stream has no integrity check"}, {LZMA_UNSUPPORTED_CHECK, "cannot calculate the integrity check"}, {LZMA_GET_CHECK, "integrity check type is now available"}, {LZMA_MEM_ERROR, "cannot allocate memory"}, {LZMA_MEMLIMIT_ERROR, "memory usage limit was reached"}, {LZMA_FORMAT_ERROR, "file format not recognized"}, {LZMA_OPTIONS_ERROR, "invalid or unsupported options"}, {LZMA_DATA_ERROR, "data is corrupt"}, {LZMA_BUF_ERROR, "no progress is possible"}, {LZMA_PROG_ERROR, "programming error"}, // TODO: re-add when this has arrived in the mainstream... // {LZMA_SEEK_NEEDED, "request to change the input file position"}, }; std::string lzma_error_string(lzma_ret err) { if (auto it = lzma_error_desc.find(err); it != lzma_error_desc.end()) { return it->second; } return fmt::format("unknown error {}", static_cast(err)); } class lzma_block_compressor final : public block_compressor::impl { public: lzma_block_compressor(unsigned level, bool extreme, const std::string& binary_mode, unsigned dict_size); lzma_block_compressor(const lzma_block_compressor& rhs) = default; std::unique_ptr clone() const override { return std::make_unique(*this); } std::vector compress(const std::vector& data, std::string const* metadata) const override; std::vector compress(std::vector&& data, std::string const* metadata) const override { return compress(data, metadata); } compression_type type() const override { return compression_type::LZMA; } std::string describe() const override { return description_; } std::string metadata_requirements() const override { return std::string(); } compression_constraints get_compression_constraints(std::string const&) const override { return compression_constraints(); } private: std::vector compress(const std::vector& data, const lzma_filter* filters) const; static uint32_t get_preset(unsigned level, bool extreme) { uint32_t preset = level; if (extreme) { preset |= LZMA_PRESET_EXTREME; } return preset; } static lzma_vli get_vli(const std::string& binary) { if (binary.empty()) { return LZMA_VLI_UNKNOWN; } std::unordered_map vm{ {"x86", LZMA_FILTER_X86}, {"powerpc", LZMA_FILTER_POWERPC}, {"ia64", LZMA_FILTER_IA64}, {"arm", LZMA_FILTER_ARM}, {"armthumb", LZMA_FILTER_ARMTHUMB}, {"sparc", LZMA_FILTER_SPARC}, }; auto i = vm.find(binary); if (i == vm.end()) { DWARFS_THROW(runtime_error, "unsupported binary mode"); } return i->second; } lzma_options_lzma opt_lzma_; lzma_vli binary_vli_; std::string description_; }; lzma_block_compressor::lzma_block_compressor(unsigned level, bool extreme, const std::string& binary_mode, unsigned dict_size) : binary_vli_{get_vli(binary_mode)} , description_{ fmt::format("lzma [level={}, dict_size={}{}{}]", level, dict_size, extreme ? ", extreme" : "", binary_mode.empty() ? "" : ", binary=" + binary_mode)} { if (lzma_lzma_preset(&opt_lzma_, get_preset(level, extreme))) { DWARFS_THROW(runtime_error, "unsupported preset, possibly a bug"); } if (dict_size > 0) { opt_lzma_.dict_size = 1 << dict_size; } } std::vector lzma_block_compressor::compress(const std::vector& data, const lzma_filter* filters) const { lzma_stream s = LZMA_STREAM_INIT; if (auto ret = lzma_stream_encoder(&s, filters, LZMA_CHECK_CRC64); ret != LZMA_OK) { DWARFS_THROW(runtime_error, fmt::format("lzma_stream_encoder: {}", lzma_error_string(ret))); } lzma_action action = LZMA_FINISH; std::vector compressed(data.size() - 1); s.next_in = data.data(); s.avail_in = data.size(); s.next_out = compressed.data(); s.avail_out = compressed.size(); lzma_ret ret = lzma_code(&s, action); compressed.resize(compressed.size() - s.avail_out); lzma_end(&s); if (ret == 0) { throw bad_compression_ratio_error(); } if (ret == LZMA_STREAM_END) { compressed.shrink_to_fit(); } else { DWARFS_THROW(runtime_error, fmt::format("LZMA compression failed: {}", lzma_error_string(ret))); } return compressed; } std::vector lzma_block_compressor::compress(const std::vector& data, std::string const* /*metadata*/) const { auto lzma_opts = opt_lzma_; std::array filters{{{binary_vli_, NULL}, {LZMA_FILTER_LZMA2, &lzma_opts}, {LZMA_VLI_UNKNOWN, NULL}}}; std::vector best = compress(data, &filters[1]); if (filters[0].id != LZMA_VLI_UNKNOWN) { std::vector compressed = compress(data, &filters[0]); if (compressed.size() < best.size()) { best.swap(compressed); } } return best; } class lzma_block_decompressor final : public block_decompressor::impl { public: lzma_block_decompressor(const uint8_t* data, size_t size, std::vector& target) : stream_(LZMA_STREAM_INIT) , decompressed_(target) , uncompressed_size_(get_uncompressed_size(data, size)) { stream_.next_in = data; stream_.avail_in = size; if (auto ret = lzma_stream_decoder(&stream_, UINT64_MAX, LZMA_CONCATENATED); ret != LZMA_OK) { DWARFS_THROW(runtime_error, fmt::format("lzma_stream_decoder: {}", lzma_error_string(ret))); } try { decompressed_.reserve(uncompressed_size_); } catch (std::bad_alloc const&) { DWARFS_THROW( runtime_error, fmt::format("could not reserve {} bytes for decompressed block", uncompressed_size_)); } } ~lzma_block_decompressor() override { lzma_end(&stream_); } compression_type type() const override { return compression_type::LZMA; } std::optional metadata() const override { return std::nullopt; } bool decompress_frame(size_t frame_size) override { if (!error_.empty()) { DWARFS_THROW(runtime_error, error_); } lzma_action action = LZMA_RUN; if (decompressed_.size() + frame_size > uncompressed_size_) { frame_size = uncompressed_size_ - decompressed_.size(); action = LZMA_FINISH; } assert(frame_size > 0); size_t offset = decompressed_.size(); decompressed_.resize(offset + frame_size); stream_.next_out = decompressed_.data() + offset; stream_.avail_out = frame_size; lzma_ret ret = lzma_code(&stream_, action); if (ret == LZMA_STREAM_END) { lzma_end(&stream_); } if (ret != (action == LZMA_RUN ? LZMA_OK : LZMA_STREAM_END) || stream_.avail_out != 0) { decompressed_.clear(); error_ = fmt::format("LZMA decompression failed: {}", lzma_error_string(ret)); DWARFS_THROW(runtime_error, error_); } return ret == LZMA_STREAM_END; } size_t uncompressed_size() const override { return uncompressed_size_; } private: static size_t get_uncompressed_size(const uint8_t* data, size_t size); lzma_stream stream_; std::vector& decompressed_; const size_t uncompressed_size_; std::string error_; }; size_t lzma_block_decompressor::get_uncompressed_size(const uint8_t* data, size_t size) { if (size < 2 * LZMA_STREAM_HEADER_SIZE) { DWARFS_THROW(runtime_error, "lzma compressed block is too small"); } lzma_stream s = LZMA_STREAM_INIT; file_off_t pos = size - LZMA_STREAM_HEADER_SIZE; const uint32_t* ptr = reinterpret_cast(data + size) - 1; while (*ptr == 0) { pos -= 4; --ptr; if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { DWARFS_THROW(runtime_error, "data error (stream padding)"); } } lzma_stream_flags footer_flags; if (auto ret = lzma_stream_footer_decode(&footer_flags, data + pos); ret != LZMA_OK) { DWARFS_THROW(runtime_error, fmt::format("lzma_stream_footer_decode: {}", lzma_error_string(ret))); } lzma_vli index_size = footer_flags.backward_size; if (static_cast(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { DWARFS_THROW(runtime_error, "data error (index size)"); } pos -= index_size; lzma_index* index = NULL; if (auto ret = lzma_index_decoder(&s, &index, UINT64_MAX); ret != LZMA_OK) { DWARFS_THROW(runtime_error, fmt::format("lzma_index_decoder: {}", lzma_error_string(ret))); } s.avail_in = index_size; s.next_in = data + pos; lzma_ret ret = lzma_code(&s, LZMA_RUN); if (ret != LZMA_STREAM_END || s.avail_in != 0) { DWARFS_THROW(runtime_error, fmt::format("lzma_code(): {} (avail_in={})", lzma_error_string(ret), s.avail_in)); } pos -= LZMA_STREAM_HEADER_SIZE; if (static_cast(pos) < lzma_index_total_size(index)) { DWARFS_THROW(runtime_error, "data error (index total size)"); } size_t usize = lzma_index_uncompressed_size(index); // TODO: wrap this in some RAII container, as error handling is horrible... lzma_end(&s); lzma_index_end(index, NULL); return usize; } class lzma_compression_factory : public compression_factory { public: static constexpr compression_type type{compression_type::LZMA}; std::string_view name() const override { return "lzma"; } std::string_view description() const override { static std::string const s_desc{ fmt::format("LZMA compression (liblzma {})", ::lzma_version_string())}; return s_desc; } std::vector const& options() const override { return options_; } std::set library_dependencies() const override { return {fmt::format("liblzma-{}", ::lzma_version_string())}; } std::unique_ptr make_compressor(option_map& om) const override { return std::make_unique( om.get("level", 9u), om.get("extreme", false), om.get("binary"), om.get("dict_size", 0u)); } std::unique_ptr make_decompressor(std::span data, std::vector& target) const override { return std::make_unique(data.data(), data.size(), target); } private: std::vector const options_{ "level=[0..9]", "dict_size=[12..30]", "extreme", "binary={x86,powerpc,ia64,arm,armthumb,sparc}", }; }; } // namespace REGISTER_COMPRESSION_FACTORY(lzma_compression_factory) } // namespace dwarfs