/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace dwarfs {
namespace {
constexpr uint8_t const kFlagBigEndian{0x80};
constexpr uint8_t const kFlagSigned{0x40};
constexpr uint8_t const kFlagLsbPadding{0x20};
constexpr uint8_t const kBytesPerSampleMask{0x03};
constexpr size_t const kBlockSize{65536};
class dwarfs_flac_stream_encoder final : public FLAC::Encoder::Stream {
public:
explicit dwarfs_flac_stream_encoder(std::vector& data)
: data_{data}
, pos_{data_.size()} {}
::FLAC__StreamEncoderReadStatus
read_callback(FLAC__byte buffer[], size_t* bytes) override {
::memcpy(buffer, data_.data() + pos_, *bytes);
return FLAC__STREAM_ENCODER_READ_STATUS_CONTINUE;
}
::FLAC__StreamEncoderWriteStatus
write_callback(const FLAC__byte buffer[], size_t bytes, uint32_t,
uint32_t) override {
size_t end = pos_ + bytes;
if (data_.size() < end) {
data_.resize(end);
}
::memcpy(data_.data() + pos_, buffer, bytes);
pos_ += bytes;
return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
}
::FLAC__StreamEncoderSeekStatus
seek_callback(FLAC__uint64 absolute_byte_offset) override {
pos_ = absolute_byte_offset;
return FLAC__STREAM_ENCODER_SEEK_STATUS_OK;
}
::FLAC__StreamEncoderTellStatus
tell_callback(FLAC__uint64* absolute_byte_offset) override {
*absolute_byte_offset = pos_;
return FLAC__STREAM_ENCODER_TELL_STATUS_OK;
}
private:
std::vector& data_;
size_t pos_;
};
class dwarfs_flac_stream_decoder final : public FLAC::Decoder::Stream {
public:
dwarfs_flac_stream_decoder(
std::vector& target, std::span data,
thrift::compression::flac_block_header const& header)
: target_{target}
, data_{data}
, header_{header}
, bytes_per_sample_{(header_.flags().value() & kBytesPerSampleMask) + 1}
, xfm_{header_.flags().value() & kFlagBigEndian
? pcm_sample_endianness::Big
: pcm_sample_endianness::Little,
header_.flags().value() & kFlagSigned
? pcm_sample_signedness::Signed
: pcm_sample_signedness::Unsigned,
header_.flags().value() & kFlagLsbPadding
? pcm_sample_padding::Lsb
: pcm_sample_padding::Msb,
bytes_per_sample_, header_.bits_per_sample().value()} {}
::FLAC__StreamDecoderReadStatus
read_callback(FLAC__byte buffer[], size_t* bytes) override {
if (pos_ >= data_.size()) {
return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
}
if (pos_ + *bytes > data_.size()) {
*bytes = data_.size() - pos_;
}
if (*bytes > 0) {
::memcpy(buffer, data_.data() + pos_, *bytes);
}
pos_ += *bytes;
return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
}
::FLAC__StreamDecoderWriteStatus
write_callback(const ::FLAC__Frame* frame,
const FLAC__int32* const buffer[]) override {
auto samples = frame->header.blocksize;
auto channels = frame->header.channels;
tmp_.resize(channels * samples);
for (uint_fast32_t i = 0; i < samples; ++i) {
for (uint_fast32_t c = 0; c < channels; ++c) {
tmp_[i * channels + c] = buffer[c][i];
}
}
auto pos = target_.size();
size_t size = channels * samples * bytes_per_sample_;
target_.resize(pos + size);
xfm_.pack(std::span(&target_[pos], size), tmp_);
return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
}
void error_callback(::FLAC__StreamDecoderErrorStatus status) override {
DWARFS_THROW(runtime_error,
fmt::format("[FLAC] decoder error: {}",
FLAC__StreamDecoderErrorStatusString[status]));
}
::FLAC__StreamDecoderSeekStatus
seek_callback(FLAC__uint64 absolute_byte_offset) override {
if (absolute_byte_offset > data_.size()) {
return ::FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
}
pos_ = absolute_byte_offset;
return ::FLAC__STREAM_DECODER_SEEK_STATUS_OK;
}
::FLAC__StreamDecoderTellStatus
tell_callback(FLAC__uint64* absolute_byte_offset) override {
*absolute_byte_offset = pos_;
return ::FLAC__STREAM_DECODER_TELL_STATUS_OK;
}
::FLAC__StreamDecoderLengthStatus
length_callback(FLAC__uint64* stream_length) override {
*stream_length = data_.size();
return ::FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
}
bool eof_callback() override { return pos_ >= data_.size(); }
private:
std::vector& target_;
std::vector tmp_;
std::span data_;
thrift::compression::flac_block_header const& header_;
int const bytes_per_sample_;
pcm_sample_transformer xfm_;
size_t pos_{0};
};
class flac_block_compressor final : public block_compressor::impl {
public:
flac_block_compressor(uint32_t level, bool exhaustive)
: level_{level}
, exhaustive_{exhaustive} {}
flac_block_compressor(const flac_block_compressor& rhs) = default;
std::unique_ptr clone() const override {
return std::make_unique(*this);
}
std::vector compress(const std::vector& data,
std::string const* metadata) const override {
if (!metadata) {
DWARFS_THROW(runtime_error,
"internal error: flac compression requires metadata");
}
auto meta = nlohmann::json::parse(*metadata);
auto endianness = meta["endianness"].get();
auto signedness = meta["signedness"].get();
auto padding = meta["padding"].get();
auto num_channels = meta["number_of_channels"].get();
auto bits_per_sample = meta["bits_per_sample"].get();
auto bytes_per_sample = meta["bytes_per_sample"].get();
assert(1 <= bytes_per_sample && bytes_per_sample <= 4);
assert(8 <= bits_per_sample && bits_per_sample <= 32);
assert(1 <= num_channels);
if (data.size() % (num_channels * bytes_per_sample)) {
DWARFS_THROW(
runtime_error,
fmt::format("unexpected PCM waveform configuration: {} bytes to "
"compress, {} channels, {} bytes per sample",
data.size(), num_channels, bytes_per_sample));
}
size_t num_samples = data.size() / (num_channels * bytes_per_sample);
pcm_sample_endianness pcm_end;
pcm_sample_signedness pcm_sig;
pcm_sample_padding pcm_pad;
uint8_t flags = bytes_per_sample - 1;
if (endianness == "big") {
flags |= kFlagBigEndian;
pcm_end = pcm_sample_endianness::Big;
} else {
pcm_end = pcm_sample_endianness::Little;
}
if (signedness == "signed") {
flags |= kFlagSigned;
pcm_sig = pcm_sample_signedness::Signed;
} else {
pcm_sig = pcm_sample_signedness::Unsigned;
}
if (padding == "lsb") {
flags |= kFlagLsbPadding;
pcm_pad = pcm_sample_padding::Lsb;
} else {
pcm_pad = pcm_sample_padding::Msb;
}
std::vector compressed;
{
using namespace ::apache::thrift;
compressed.reserve(5 * data.size() / 8); // optimistic guess
compressed.resize(folly::kMaxVarintLength64);
size_t pos = 0;
pos += folly::encodeVarint(data.size(), compressed.data() + pos);
compressed.resize(pos);
thrift::compression::flac_block_header hdr;
hdr.num_channels() = num_channels;
hdr.bits_per_sample() = bits_per_sample;
hdr.flags() = flags;
std::string hdrbuf;
CompactSerializer::serialize(hdr, &hdrbuf);
compressed.resize(pos + hdrbuf.size());
::memcpy(&compressed[pos], hdrbuf.data(), hdrbuf.size());
}
dwarfs_flac_stream_encoder encoder(compressed);
encoder.set_streamable_subset(false);
encoder.set_channels(num_channels);
encoder.set_bits_per_sample(bits_per_sample);
encoder.set_sample_rate(48000); // TODO: see if a fixed rate makes sense
encoder.set_compression_level(level_);
encoder.set_do_exhaustive_model_search(exhaustive_);
encoder.set_total_samples_estimate(num_samples);
if (encoder.init() != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
DWARFS_THROW(
runtime_error,
fmt::format("[FLAC] init: {}", encoder.get_state().as_cstring()));
}
pcm_sample_transformer xfm(pcm_end, pcm_sig, pcm_pad,
bytes_per_sample, bits_per_sample);
const auto samples_per_call = kBlockSize / num_channels;
std::vector buffer;
size_t input_pos = 0;
while (num_samples > 0) {
auto n = std::min(num_samples, samples_per_call);
buffer.resize(n * num_channels);
xfm.unpack(buffer,
std::span(data.data() + input_pos,
buffer.size() * bytes_per_sample));
if (!encoder.process_interleaved(buffer.data(), n)) {
DWARFS_THROW(
runtime_error,
fmt::format("[FLAC] failed to process interleaved samples: {}",
encoder.get_state().as_cstring()));
}
input_pos += buffer.size() * bytes_per_sample;
num_samples -= n;
}
if (!encoder.finish()) {
DWARFS_THROW(runtime_error, "[FLAC] failed to finish encoder");
}
// XXX: don't throw this as we're losing metadata
// if (compressed.size() >= data.size()) {
// throw bad_compression_ratio_error();
// }
compressed.shrink_to_fit();
return compressed;
}
std::vector compress(std::vector&& data,
std::string const* metadata) const override {
return compress(data, metadata);
}
compression_type type() const override { return compression_type::FLAC; }
std::string describe() const override {
return fmt::format("flac [level={}{}]", level_,
exhaustive_ ? ", exhaustive" : "");
}
std::string metadata_requirements() const override {
using nlj = nlohmann::json;
nlohmann::json req = {
{"endianness", nlj::array({"set", nlj::array({"big", "little"})})},
{"signedness", nlj::array({"set", nlj::array({"signed", "unsigned"})})},
{"padding", nlj::array({"set", nlj::array({"msb", "lsb"})})},
{"bytes_per_sample", nlj::array({"range", 1, 4})},
{"bits_per_sample", nlj::array({"range", 8, 32})},
{"number_of_channels", nlj::array({"range", 1, 8})},
};
return req.dump();
}
compression_constraints
get_compression_constraints(std::string const& metadata) const override {
auto meta = nlohmann::json::parse(metadata);
auto num_channels = meta["number_of_channels"].get();
auto bytes_per_sample = meta["bytes_per_sample"].get();
compression_constraints cc;
cc.granularity = num_channels * bytes_per_sample;
return cc;
}
private:
uint32_t const level_;
bool const exhaustive_;
};
class flac_block_decompressor final : public block_decompressor::impl {
public:
flac_block_decompressor(const uint8_t* data, size_t size,
std::vector& target)
: flac_block_decompressor(folly::Range(data, size),
target) {}
flac_block_decompressor(folly::Range data,
std::vector& target)
: decompressed_{target}
, uncompressed_size_{folly::decodeVarint(data)}
, header_{decode_header(data)}
, decoder_{std::make_unique(
decompressed_, std::span(data.data(), data.size()),
header_)} {
decoder_->set_md5_checking(false);
decoder_->set_metadata_ignore_all();
if (auto status = decoder_->init();
status != FLAC__STREAM_DECODER_INIT_STATUS_OK) {
DWARFS_THROW(runtime_error,
fmt::format("[FLAC] could not initialize decoder: {}",
FLAC__StreamDecoderInitStatusString[status]));
}
try {
decompressed_.reserve(uncompressed_size_);
} catch (std::bad_alloc const&) {
DWARFS_THROW(
runtime_error,
fmt::format(
"[FLAC] could not reserve {} bytes for decompressed block",
uncompressed_size_));
}
}
compression_type type() const override { return compression_type::FLAC; }
std::optional metadata() const override {
auto const flags = header_.flags().value();
nlohmann::json meta{
{"endianness", flags & kFlagBigEndian ? "big" : "little"},
{"signedness", flags & kFlagSigned ? "signed" : "unsigned"},
{"padding", flags & kFlagLsbPadding ? "lsb" : "msb"},
{"bytes_per_sample", (flags & kBytesPerSampleMask) + 1},
{"bits_per_sample", header_.bits_per_sample().value()},
{"number_of_channels", header_.num_channels().value()},
};
return meta.dump();
}
bool decompress_frame(size_t frame_size) override {
size_t pos = decompressed_.size();
if (pos + frame_size > uncompressed_size_) {
assert(uncompressed_size_ >= pos);
frame_size = uncompressed_size_ - pos;
}
size_t wanted = pos + frame_size;
assert(wanted <= uncompressed_size_);
assert(frame_size > 0);
while (decompressed_.size() < wanted) {
if (!decoder_->process_single()) {
DWARFS_THROW(runtime_error,
fmt::format("[FLAC] failed to process frame: {}",
decoder_->get_state().as_cstring()));
}
}
if (decompressed_.size() == uncompressed_size_) {
decoder_.reset();
return true;
}
return false;
}
size_t uncompressed_size() const override { return uncompressed_size_; }
private:
static thrift::compression::flac_block_header
decode_header(folly::Range& range) {
using namespace ::apache::thrift;
thrift::compression::flac_block_header hdr;
auto size = CompactSerializer::deserialize(range, hdr);
range.advance(size);
return hdr;
}
std::vector& decompressed_;
size_t const uncompressed_size_;
thrift::compression::flac_block_header const header_;
std::unique_ptr decoder_;
};
class flac_compression_factory : public compression_factory {
public:
static constexpr compression_type type{compression_type::FLAC};
flac_compression_factory()
: options_{
fmt::format("level=[0..8]"),
fmt::format("exhaustive"),
} {}
std::string_view name() const override { return "flac"; }
std::string_view description() const override {
static std::string const s_desc{
fmt::format("FLAC compression (libFLAC++ {})", ::FLAC__VERSION_STRING)};
return s_desc;
}
std::vector const& options() const override { return options_; }
std::set library_dependencies() const override {
return {fmt::format("libFLAC++-{}", ::FLAC__VERSION_STRING)};
}
std::unique_ptr
make_compressor(option_map& om) const override {
return std::make_unique(
om.get("level", 5), om.get("exhaustive", false));
}
std::unique_ptr
make_decompressor(std::span data,
std::vector& target) const override {
return std::make_unique(data.data(), data.size(),
target);
}
private:
std::vector const options_;
};
} // namespace
REGISTER_COMPRESSION_FACTORY(flac_compression_factory)
} // namespace dwarfs