/* vim:set ts=2 sw=2 sts=2 et: */ /** * \author Marcus Holland-Moritz (github@mhxnet.de) * \copyright Copyright (c) Marcus Holland-Moritz * * This file is part of dwarfs. * * dwarfs is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * dwarfs is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with dwarfs. If not, see . */ #include #include #include #include #include #include #include #include #include #include namespace dwarfs { namespace { constexpr int RICEPP_VERSION{1}; class ricepp_block_compressor final : public block_compressor::impl { public: ricepp_block_compressor(size_t block_size) : block_size_{block_size} {} ricepp_block_compressor(const ricepp_block_compressor& rhs) = default; std::unique_ptr clone() const override { return std::make_unique(*this); } std::vector compress(const std::vector& data, std::string const* metadata) const override { if (!metadata) { DWARFS_THROW(runtime_error, "internal error: ricepp compression requires metadata"); } auto meta = nlohmann::json::parse(*metadata); auto endianness = meta["endianness"].get(); auto component_count = meta["component_count"].get(); auto unused_lsb_count = meta["unused_lsb_count"].get(); auto bytes_per_sample = meta["bytes_per_sample"].get(); assert(2 <= bytes_per_sample && bytes_per_sample <= 2); assert(0 <= unused_lsb_count && unused_lsb_count <= 8); assert(1 <= component_count && component_count <= 2); if (data.size() % (component_count * bytes_per_sample)) { DWARFS_THROW(runtime_error, fmt::format("unexpected data configuration: {} bytes to " "compress, {} components, {} bytes per sample", data.size(), component_count, bytes_per_sample)); } using pixel_type = uint16_t; auto byteorder = endianness == "big" ? std::endian::big : std::endian::little; auto codec = ricepp::create_codec({ .block_size = block_size_, .component_stream_count = static_cast(component_count), .byteorder = byteorder, .unused_lsb_count = static_cast(unused_lsb_count), }); std::vector compressed; { using namespace ::apache::thrift; compressed.resize(folly::kMaxVarintLength64); size_t pos = 0; pos += folly::encodeVarint(data.size(), compressed.data() + pos); compressed.resize(pos); thrift::compression::ricepp_block_header hdr; hdr.block_size() = block_size_; hdr.component_count() = component_count; hdr.bytes_per_sample() = bytes_per_sample; hdr.unused_lsb_count() = unused_lsb_count; hdr.big_endian() = byteorder == std::endian::big; hdr.ricepp_version() = RICEPP_VERSION; std::string hdrbuf; CompactSerializer::serialize(hdr, &hdrbuf); compressed.resize(pos + hdrbuf.size()); ::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size()); } std::span input{ reinterpret_cast(data.data()), data.size() / bytes_per_sample}; size_t header_size = compressed.size(); compressed.resize(header_size + codec->worst_case_encoded_bytes(input)); std::span buffer(compressed); auto output = codec->encode(buffer.subspan(header_size), input); compressed.resize(header_size + output.size()); compressed.shrink_to_fit(); return compressed; } std::vector compress(std::vector&& data, std::string const* metadata) const override { return compress(data, metadata); } compression_type type() const override { return compression_type::RICEPP; } std::string describe() const override { return fmt::format("ricepp [block_size={}]", block_size_); } std::string metadata_requirements() const override { using nlj = nlohmann::json; nlohmann::json req{ {"endianness", nlj::array({"set", nlj::array({"big", "little"})})}, {"bytes_per_sample", nlj::array({"set", nlj::array({2})})}, {"component_count", nlj::array({"range", 1, 2})}, {"unused_lsb_count", nlj::array({"range", 0, 8})}, }; return req.dump(); } compression_constraints get_compression_constraints(std::string const& metadata) const override { auto meta = nlohmann::json::parse(metadata); auto component_count = meta["component_count"].get(); auto bytes_per_sample = meta["bytes_per_sample"].get(); compression_constraints cc; cc.granularity = component_count * bytes_per_sample; return cc; } private: size_t const block_size_; }; class ricepp_block_decompressor final : public block_decompressor::impl { public: ricepp_block_decompressor(const uint8_t* data, size_t size, std::vector& target) : ricepp_block_decompressor(folly::Range(data, size), target) {} ricepp_block_decompressor(folly::Range data, std::vector& target) : decompressed_{target} , uncompressed_size_{folly::decodeVarint(data)} , header_{decode_header(data)} , data_{data.data(), data.size()} , codec_{ricepp::create_codec( {.block_size = header_.block_size().value(), .component_stream_count = header_.component_count().value(), .byteorder = header_.big_endian().value() ? std::endian::big : std::endian::little, .unused_lsb_count = header_.unused_lsb_count().value()})} { if (header_.bytes_per_sample().value() != 2) { DWARFS_THROW(runtime_error, fmt::format("[RICEPP] unsupported bytes per sample: {}", header_.bytes_per_sample().value())); } try { decompressed_.reserve(uncompressed_size_); } catch (std::bad_alloc const&) { DWARFS_THROW( runtime_error, fmt::format( "[RICEPP] could not reserve {} bytes for decompressed block", uncompressed_size_)); } } compression_type type() const override { return compression_type::RICEPP; } std::optional metadata() const override { nlohmann::json meta{ {"endianness", header_.big_endian().value() ? "big" : "little"}, {"bytes_per_sample", header_.bytes_per_sample().value()}, {"unused_lsb_count", header_.unused_lsb_count().value()}, {"component_count", header_.component_count().value()}, }; return meta.dump(); } bool decompress_frame(size_t) override { if (!codec_) { return false; } decompressed_.resize(uncompressed_size_); std::span output{ reinterpret_cast(decompressed_.data()), decompressed_.size() / 2}; codec_->decode(output, data_); codec_.reset(); return true; } size_t uncompressed_size() const override { return uncompressed_size_; } private: static thrift::compression::ricepp_block_header decode_header(folly::Range& range) { using namespace ::apache::thrift; thrift::compression::ricepp_block_header hdr; auto size = CompactSerializer::deserialize(range, hdr); range.advance(size); if (hdr.ricepp_version().value() > RICEPP_VERSION) { DWARFS_THROW(runtime_error, fmt::format("[RICEPP] unsupported version: {}", hdr.ricepp_version().value())); } return hdr; } std::vector& decompressed_; size_t const uncompressed_size_; thrift::compression::ricepp_block_header const header_; std::span data_; std::unique_ptr> codec_; }; class ricepp_compression_factory : public compression_factory { public: static constexpr compression_type type{compression_type::RICEPP}; ricepp_compression_factory() : options_{ fmt::format("block_size=[{}..{}]", 16, 512), } {} std::string_view name() const override { return "ricepp"; } std::string_view description() const override { static std::string const s_desc{"RICEPP compression"}; return s_desc; } std::vector const& options() const override { return options_; } std::set library_dependencies() const override { return {}; } std::unique_ptr make_compressor(option_map& om) const override { return std::make_unique( om.get("block_size", 128)); } std::unique_ptr make_decompressor(std::span data, std::vector& target) const override { return std::make_unique(data.data(), data.size(), target); } private: std::vector const options_; }; } // namespace REGISTER_COMPRESSION_FACTORY(ricepp_compression_factory) } // namespace dwarfs