diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 2f34fd038..7386a1269 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -64,6 +64,8 @@ set(ICEBERG_SOURCES partition_spec.cc partition_summary.cc puffin/file_metadata.cc + puffin/puffin_format.cc + puffin/puffin_json_internal.cc row/arrow_array_wrapper.cc row/manifest_wrapper.cc row/partition_values.cc diff --git a/src/iceberg/deletes/roaring_position_bitmap.cc b/src/iceberg/deletes/roaring_position_bitmap.cc index fec037887..3a9fd3e57 100644 --- a/src/iceberg/deletes/roaring_position_bitmap.cc +++ b/src/iceberg/deletes/roaring_position_bitmap.cc @@ -49,28 +49,6 @@ int64_t ToPosition(int32_t key, uint32_t pos32) { return (int64_t{key} << 32) | int64_t{pos32}; } -void WriteLE64(char* buf, int64_t value) { - auto le = ToLittleEndian(static_cast(value)); - std::memcpy(buf, &le, sizeof(le)); -} - -void WriteLE32(char* buf, int32_t value) { - auto le = ToLittleEndian(static_cast(value)); - std::memcpy(buf, &le, sizeof(le)); -} - -int64_t ReadLE64(const char* buf) { - uint64_t v; - std::memcpy(&v, buf, sizeof(v)); - return static_cast(FromLittleEndian(v)); -} - -int32_t ReadLE32(const char* buf) { - uint32_t v; - std::memcpy(&v, buf, sizeof(v)); - return static_cast(FromLittleEndian(v)); -} - Status ValidatePosition(int64_t pos) { if (pos < 0 || pos > RoaringPositionBitmap::kMaxPosition) { return InvalidArgument("Bitmap supports positions that are >= 0 and <= {}: {}", @@ -189,12 +167,12 @@ Result RoaringPositionBitmap::Serialize() const { char* buf = result.data(); // Write bitmap count (array length including empties) - WriteLE64(buf, static_cast(impl_->bitmaps.size())); + WriteLittleEndian(static_cast(impl_->bitmaps.size()), buf); buf += kBitmapCountSizeBytes; // Write each bitmap with its key for (int32_t key = 0; std::cmp_less(key, impl_->bitmaps.size()); ++key) { - WriteLE32(buf, key); + WriteLittleEndian(key, buf); buf += kBitmapKeySizeBytes; size_t written = impl_->bitmaps[key].write(buf, /*portable=*/true); buf += written; @@ -210,7 +188,7 @@ Result RoaringPositionBitmap::Deserialize(std::string_vie ICEBERG_PRECHECK(remaining >= kBitmapCountSizeBytes, "Buffer too small for bitmap count: {} bytes", remaining); - int64_t bitmap_count = ReadLE64(buf); + auto bitmap_count = ReadLittleEndian(buf); buf += kBitmapCountSizeBytes; remaining -= kBitmapCountSizeBytes; @@ -226,7 +204,7 @@ Result RoaringPositionBitmap::Deserialize(std::string_vie ICEBERG_PRECHECK(remaining >= kBitmapKeySizeBytes, "Buffer too small for bitmap key: {} bytes", remaining); - int32_t key = ReadLE32(buf); + auto key = ReadLittleEndian(buf); buf += kBitmapKeySizeBytes; remaining -= kBitmapKeySizeBytes; diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 42f46d5a9..6dc8b576a 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -82,6 +82,8 @@ iceberg_sources = files( 'partition_spec.cc', 'partition_summary.cc', 'puffin/file_metadata.cc', + 'puffin/puffin_format.cc', + 'puffin/puffin_json_internal.cc', 'row/arrow_array_wrapper.cc', 'row/manifest_wrapper.cc', 'row/partition_values.cc', diff --git a/src/iceberg/puffin/meson.build b/src/iceberg/puffin/meson.build index 0655156eb..d855544bf 100644 --- a/src/iceberg/puffin/meson.build +++ b/src/iceberg/puffin/meson.build @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -install_headers(['file_metadata.h'], subdir: 'iceberg/puffin') +install_headers(['file_metadata.h', 'puffin_format.h'], subdir: 'iceberg/puffin') diff --git a/src/iceberg/puffin/puffin_format.cc b/src/iceberg/puffin/puffin_format.cc new file mode 100644 index 000000000..81b1f5111 --- /dev/null +++ b/src/iceberg/puffin/puffin_format.cc @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/puffin/puffin_format.h" + +#include + +#include "iceberg/util/endian.h" +#include "iceberg/util/macros.h" + +namespace iceberg::puffin { + +namespace { + +constexpr std::pair GetFlagPosition(PuffinFlag flag) { + switch (flag) { + case PuffinFlag::kFooterPayloadCompressed: + return {0, 0}; + } + std::unreachable(); +} + +} // namespace + +bool IsFlagSet(std::span flags, PuffinFlag flag) { + auto [byte_num, bit_num] = GetFlagPosition(flag); + return (flags[byte_num] & (1 << bit_num)) != 0; +} + +void SetFlag(std::span flags, PuffinFlag flag) { + auto [byte_num, bit_num] = GetFlagPosition(flag); + flags[byte_num] |= (1 << bit_num); +} + +void WriteInt32LittleEndian(int32_t value, std::span output) { + WriteLittleEndian(value, output.data()); +} + +int32_t ReadInt32LittleEndian(std::span input) { + return ReadLittleEndian(input.data()); +} + +int32_t ReadInt32LittleEndian(std::span data, int32_t offset) { + ICEBERG_DCHECK(offset >= 0, "Offset must be non-negative"); + ICEBERG_DCHECK(static_cast(offset) + 4 <= data.size(), "Offset out of bounds"); + return ReadInt32LittleEndian(std::span(data.data() + offset, 4)); +} + +Result> Compress(PuffinCompressionCodec codec, + std::span input) { + switch (codec) { + case PuffinCompressionCodec::kNone: + return std::vector(input.begin(), input.end()); + case PuffinCompressionCodec::kLz4: + return NotSupported("LZ4 compression is not yet supported"); + case PuffinCompressionCodec::kZstd: + return NotSupported("Zstd compression is not yet supported"); + } + std::unreachable(); +} + +Result> Decompress(PuffinCompressionCodec codec, + std::span input) { + switch (codec) { + case PuffinCompressionCodec::kNone: + return std::vector(input.begin(), input.end()); + case PuffinCompressionCodec::kLz4: + return NotSupported("LZ4 decompression is not yet supported"); + case PuffinCompressionCodec::kZstd: + return NotSupported("Zstd decompression is not yet supported"); + } + std::unreachable(); +} + +} // namespace iceberg::puffin diff --git a/src/iceberg/puffin/puffin_format.h b/src/iceberg/puffin/puffin_format.h new file mode 100644 index 000000000..fc023f266 --- /dev/null +++ b/src/iceberg/puffin/puffin_format.h @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_format.h +/// Puffin file format constants and utilities. + +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg::puffin { + +/// \brief Puffin file format constants. +struct ICEBERG_EXPORT PuffinFormat { + /// Magic bytes: "PFA1" (Puffin Fratercula arctica, version 1) + static constexpr std::array kMagic = {0x50, 0x46, 0x41, 0x31}; + + static constexpr int32_t kMagicLength = 4; + static constexpr int32_t kFooterStartMagicOffset = 0; + static constexpr int32_t kFooterStartMagicLength = kMagicLength; + static constexpr int32_t kFooterStructPayloadSizeOffset = 0; + static constexpr int32_t kFooterStructFlagsOffset = kFooterStructPayloadSizeOffset + 4; + static constexpr int32_t kFooterStructFlagsLength = 4; + static constexpr int32_t kFooterStructMagicOffset = + kFooterStructFlagsOffset + kFooterStructFlagsLength; + + /// Total length of the footer struct: payload_size(4) + flags(4) + magic(4) + static constexpr int32_t kFooterStructLength = kFooterStructMagicOffset + kMagicLength; + + /// Default compression codec for footer payload. + static constexpr PuffinCompressionCodec kFooterCompressionCodec = + PuffinCompressionCodec::kLz4; +}; + +/// \brief Footer flags for Puffin files. +enum class PuffinFlag : uint8_t { + /// Whether the footer payload is compressed. + kFooterPayloadCompressed = 0, +}; + +/// \brief Check if a flag is set in the flags bytes. +ICEBERG_EXPORT bool IsFlagSet(std::span flags, PuffinFlag flag); + +/// \brief Set a flag in the flags bytes. +ICEBERG_EXPORT void SetFlag(std::span flags, PuffinFlag flag); + +/// \brief Write a 32-bit integer in little-endian format. +ICEBERG_EXPORT void WriteInt32LittleEndian(int32_t value, std::span output); + +/// \brief Read a 32-bit integer from a fixed-size span in little-endian format. +ICEBERG_EXPORT int32_t ReadInt32LittleEndian(std::span input); + +/// \brief Read a 32-bit integer from a buffer at the given offset in little-endian +/// format. +ICEBERG_EXPORT int32_t ReadInt32LittleEndian(std::span data, + int32_t offset); + +/// \brief Compress data using the specified codec. +ICEBERG_EXPORT Result> Compress(PuffinCompressionCodec codec, + std::span input); + +/// \brief Decompress data using the specified codec. +ICEBERG_EXPORT Result> Decompress(PuffinCompressionCodec codec, + std::span input); + +} // namespace iceberg::puffin diff --git a/src/iceberg/puffin/puffin_json_internal.cc b/src/iceberg/puffin/puffin_json_internal.cc new file mode 100644 index 000000000..2b4b41633 --- /dev/null +++ b/src/iceberg/puffin/puffin_json_internal.cc @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/puffin/puffin_json_internal.h" + +#include + +#include "iceberg/util/json_util_internal.h" +#include "iceberg/util/macros.h" + +namespace iceberg::puffin { + +namespace { +constexpr std::string_view kBlobs = "blobs"; +constexpr std::string_view kProperties = "properties"; +constexpr std::string_view kType = "type"; +constexpr std::string_view kFields = "fields"; +constexpr std::string_view kSnapshotId = "snapshot-id"; +constexpr std::string_view kSequenceNumber = "sequence-number"; +constexpr std::string_view kOffset = "offset"; +constexpr std::string_view kLength = "length"; +constexpr std::string_view kCompressionCodec = "compression-codec"; +} // namespace + +nlohmann::json ToJson(const BlobMetadata& blob_metadata) { + nlohmann::json json; + json[kType] = blob_metadata.type; + json[kFields] = blob_metadata.input_fields; + json[kSnapshotId] = blob_metadata.snapshot_id; + json[kSequenceNumber] = blob_metadata.sequence_number; + json[kOffset] = blob_metadata.offset; + json[kLength] = blob_metadata.length; + + SetOptionalStringField(json, kCompressionCodec, blob_metadata.compression_codec); + SetContainerField(json, kProperties, blob_metadata.properties); + + return json; +} + +Result BlobMetadataFromJson(const nlohmann::json& json) { + BlobMetadata blob_metadata; + + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.type, GetJsonValue(json, kType)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.input_fields, + GetJsonValue>(json, kFields)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.snapshot_id, + GetJsonValue(json, kSnapshotId)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.sequence_number, + GetJsonValue(json, kSequenceNumber)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.offset, GetJsonValue(json, kOffset)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.length, GetJsonValue(json, kLength)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.compression_codec, + GetJsonValueOrDefault(json, kCompressionCodec)); + ICEBERG_ASSIGN_OR_RAISE(blob_metadata.properties, + FromJsonMap(json, kProperties)); + + return blob_metadata; +} + +nlohmann::json ToJson(const FileMetadata& file_metadata) { + nlohmann::json json; + + nlohmann::json blobs_json = nlohmann::json::array(); + for (const auto& blob : file_metadata.blobs) { + blobs_json.push_back(ToJson(blob)); + } + json[kBlobs] = std::move(blobs_json); + + SetContainerField(json, kProperties, file_metadata.properties); + + return json; +} + +Result FileMetadataFromJson(const nlohmann::json& json) { + FileMetadata file_metadata; + + ICEBERG_ASSIGN_OR_RAISE(auto blobs_json, GetJsonValue(json, kBlobs)); + if (!blobs_json.is_array()) { + return JsonParseError("Cannot parse blobs from non-array: {}", + SafeDumpJson(blobs_json)); + } + + for (const auto& blob_json : blobs_json) { + ICEBERG_ASSIGN_OR_RAISE(auto blob, BlobMetadataFromJson(blob_json)); + file_metadata.blobs.push_back(std::move(blob)); + } + + ICEBERG_ASSIGN_OR_RAISE(file_metadata.properties, + FromJsonMap(json, kProperties)); + + return file_metadata; +} + +std::string ToJsonString(const FileMetadata& file_metadata, bool pretty) { + auto json = ToJson(file_metadata); + return pretty ? json.dump(2) : json.dump(); +} + +Result FileMetadataFromJsonString(std::string_view json_string) { + if (json_string.empty()) { + return JsonParseError("Cannot parse empty JSON string"); + } + try { + auto json = nlohmann::json::parse(json_string); + return FileMetadataFromJson(json); + } catch (const nlohmann::json::parse_error& e) { + return JsonParseError("Failed to parse JSON: {}", e.what()); + } +} + +} // namespace iceberg::puffin diff --git a/src/iceberg/puffin/puffin_json_internal.h b/src/iceberg/puffin/puffin_json_internal.h new file mode 100644 index 000000000..4ff8232ac --- /dev/null +++ b/src/iceberg/puffin/puffin_json_internal.h @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/puffin/puffin_json_internal.h +/// JSON serialization/deserialization for Puffin file metadata. + +#include +#include + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/result.h" + +namespace iceberg::puffin { + +/// \brief Serialize a BlobMetadata to JSON. +ICEBERG_EXPORT nlohmann::json ToJson(const BlobMetadata& blob_metadata); + +/// \brief Deserialize a BlobMetadata from JSON. +ICEBERG_EXPORT Result BlobMetadataFromJson(const nlohmann::json& json); + +/// \brief Serialize a FileMetadata to JSON. +ICEBERG_EXPORT nlohmann::json ToJson(const FileMetadata& file_metadata); + +/// \brief Deserialize a FileMetadata from JSON. +ICEBERG_EXPORT Result FileMetadataFromJson(const nlohmann::json& json); + +/// \brief Serialize a FileMetadata to a JSON string. +ICEBERG_EXPORT std::string ToJsonString(const FileMetadata& file_metadata, + bool pretty = false); + +/// \brief Deserialize a FileMetadata from a JSON string. +ICEBERG_EXPORT Result FileMetadataFromJsonString( + std::string_view json_string); + +} // namespace iceberg::puffin diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index c802df8a5..9ddb1aea1 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -125,6 +125,10 @@ add_iceberg_test(util_test add_iceberg_test(roaring_test SOURCES roaring_test.cc) +add_iceberg_test(puffin_format_test SOURCES puffin_format_test.cc) + +add_iceberg_test(puffin_json_test SOURCES puffin_json_test.cc) + if(ICEBERG_BUILD_BUNDLE) add_iceberg_test(avro_test USE_BUNDLE diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 1eafc5fe1..eec8f9e91 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -100,6 +100,8 @@ iceberg_tests = { ), }, 'roaring_test': {'sources': files('roaring_test.cc')}, + 'puffin_format_test': {'sources': files('puffin_format_test.cc')}, + 'puffin_json_test': {'sources': files('puffin_json_test.cc')}, } if get_option('rest').enabled() diff --git a/src/iceberg/test/puffin_format_test.cc b/src/iceberg/test/puffin_format_test.cc new file mode 100644 index 000000000..0208d9945 --- /dev/null +++ b/src/iceberg/test/puffin_format_test.cc @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/puffin/puffin_format.h" + +#include +#include +#include + +#include + +namespace iceberg::puffin { + +TEST(PuffinFormatTest, ByteOrderRoundTrip) { + std::array buf{}; + WriteInt32LittleEndian(0x12345678, buf); + EXPECT_EQ(ReadInt32LittleEndian(buf), 0x12345678); + + WriteInt32LittleEndian(0, buf); + EXPECT_EQ(ReadInt32LittleEndian(buf), 0); + + WriteInt32LittleEndian(-1, buf); + EXPECT_EQ(ReadInt32LittleEndian(buf), -1); +} + +TEST(PuffinFormatTest, FlagSetAndCheck) { + std::array flags{}; + EXPECT_FALSE(IsFlagSet(flags, PuffinFlag::kFooterPayloadCompressed)); + + SetFlag(flags, PuffinFlag::kFooterPayloadCompressed); + EXPECT_TRUE(IsFlagSet(flags, PuffinFlag::kFooterPayloadCompressed)); +} + +TEST(PuffinFormatTest, CompressDecompressNone) { + std::vector input = {1, 2, 3, 4, 5}; + auto compressed = Compress(PuffinCompressionCodec::kNone, input); + ASSERT_TRUE(compressed.has_value()); + EXPECT_EQ(compressed.value(), input); + + auto decompressed = Decompress(PuffinCompressionCodec::kNone, input); + ASSERT_TRUE(decompressed.has_value()); + EXPECT_EQ(decompressed.value(), input); +} + +TEST(PuffinFormatTest, CompressUnsupportedCodec) { + std::vector input = {1, 2, 3}; + EXPECT_FALSE(Compress(PuffinCompressionCodec::kLz4, input).has_value()); + EXPECT_FALSE(Compress(PuffinCompressionCodec::kZstd, input).has_value()); + EXPECT_FALSE(Decompress(PuffinCompressionCodec::kLz4, input).has_value()); + EXPECT_FALSE(Decompress(PuffinCompressionCodec::kZstd, input).has_value()); +} + +} // namespace iceberg::puffin diff --git a/src/iceberg/test/puffin_json_test.cc b/src/iceberg/test/puffin_json_test.cc new file mode 100644 index 000000000..78c34aaf6 --- /dev/null +++ b/src/iceberg/test/puffin_json_test.cc @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include +#include + +#include "iceberg/puffin/file_metadata.h" +#include "iceberg/puffin/puffin_json_internal.h" + +namespace iceberg::puffin { + +TEST(PuffinJsonTest, BlobMetadataRoundTrip) { + BlobMetadata blob; + blob.type = "apache-datasketches-theta-v1"; + blob.input_fields = {1, 2}; + blob.snapshot_id = 12345; + blob.sequence_number = 67; + blob.offset = 100; + blob.length = 200; + blob.compression_codec = "zstd"; + blob.properties = {{"key", "value"}}; + + nlohmann::json expected_json = R"({ + "type": "apache-datasketches-theta-v1", + "fields": [1, 2], + "snapshot-id": 12345, + "sequence-number": 67, + "offset": 100, + "length": 200, + "compression-codec": "zstd", + "properties": {"key": "value"} + })"_json; + + EXPECT_EQ(ToJson(blob), expected_json); + + auto result = BlobMetadataFromJson(expected_json); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), blob); +} + +TEST(PuffinJsonTest, BlobMetadataMinimalFields) { + BlobMetadata blob; + blob.type = "test-type"; + blob.input_fields = {1}; + blob.snapshot_id = 100; + blob.sequence_number = 1; + blob.offset = 0; + blob.length = 50; + + nlohmann::json expected_json = R"({ + "type": "test-type", + "fields": [1], + "snapshot-id": 100, + "sequence-number": 1, + "offset": 0, + "length": 50 + })"_json; + + auto json = ToJson(blob); + EXPECT_EQ(json, expected_json); + EXPECT_FALSE(json.contains("compression-codec")); + EXPECT_FALSE(json.contains("properties")); + + auto result = BlobMetadataFromJson(expected_json); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), blob); +} + +TEST(PuffinJsonTest, BlobMetadataMissingRequiredField) { + auto json = R"({"type": "test"})"_json; + EXPECT_FALSE(BlobMetadataFromJson(json).has_value()); +} + +TEST(PuffinJsonTest, FileMetadataRoundTrip) { + BlobMetadata blob1; + blob1.type = "type-a"; + blob1.input_fields = {1}; + blob1.snapshot_id = 100; + blob1.sequence_number = 1; + blob1.offset = 4; + blob1.length = 50; + blob1.compression_codec = "lz4"; + + BlobMetadata blob2; + blob2.type = "type-b"; + blob2.input_fields = {2, 3}; + blob2.snapshot_id = 200; + blob2.sequence_number = 2; + blob2.offset = 54; + blob2.length = 100; + + FileMetadata metadata; + metadata.blobs = {blob1, blob2}; + metadata.properties = {{"created-by", "iceberg-cpp-test"}}; + + nlohmann::json expected_json = R"({ + "blobs": [ + { + "type": "type-a", + "fields": [1], + "snapshot-id": 100, + "sequence-number": 1, + "offset": 4, + "length": 50, + "compression-codec": "lz4" + }, + { + "type": "type-b", + "fields": [2, 3], + "snapshot-id": 200, + "sequence-number": 2, + "offset": 54, + "length": 100 + } + ], + "properties": {"created-by": "iceberg-cpp-test"} + })"_json; + + EXPECT_EQ(ToJson(metadata), expected_json); + + auto result = FileMetadataFromJson(expected_json); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), metadata); +} + +TEST(PuffinJsonTest, FileMetadataStringRoundTrip) { + BlobMetadata blob; + blob.type = "test"; + blob.input_fields = {1}; + blob.snapshot_id = 1; + blob.sequence_number = 1; + blob.offset = 0; + blob.length = 10; + + FileMetadata metadata; + metadata.blobs = {blob}; + + auto json_str = ToJsonString(metadata); + auto result = FileMetadataFromJsonString(json_str); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), metadata); +} + +TEST(PuffinJsonTest, FileMetadataFromInvalidString) { + EXPECT_FALSE(FileMetadataFromJsonString("").has_value()); + EXPECT_FALSE(FileMetadataFromJsonString("{invalid}").has_value()); +} + +} // namespace iceberg::puffin diff --git a/src/iceberg/util/endian.h b/src/iceberg/util/endian.h index 52fa93e67..b4e7c178b 100644 --- a/src/iceberg/util/endian.h +++ b/src/iceberg/util/endian.h @@ -22,6 +22,7 @@ #include #include #include +#include /// \file iceberg/util/endian.h /// \brief Endianness conversion utilities @@ -94,4 +95,19 @@ constexpr T FromBigEndian(T value) { } } +/// \brief Write a value in little-endian format to a buffer. +template +void WriteLittleEndian(T value, void* output) { + auto le = ToLittleEndian(value); + std::memcpy(output, &le, sizeof(le)); +} + +/// \brief Read a value in little-endian format from a buffer. +template +T ReadLittleEndian(const void* input) { + T value; + std::memcpy(&value, input, sizeof(value)); + return FromLittleEndian(value); +} + } // namespace iceberg