From f2118655a604d8903b40cb88b0c45f4c6678a1e3 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Mon, 16 Feb 2026 03:54:59 -0800 Subject: [PATCH 1/8] initial --- include/svs/core/data/io.h | 35 +++++++++ include/svs/core/data/simple.h | 99 +++++++++++++++++++++++--- include/svs/core/io/native.h | 81 +++++++++++++-------- include/svs/index/flat/flat.h | 2 + include/svs/lib/archiver.h | 88 +++++++++++++++++++++++ include/svs/lib/file.h | 63 ++-------------- include/svs/lib/saveload/load.h | 40 +++++++++++ include/svs/lib/saveload/save.h | 22 ++++++ include/svs/lib/stream.h | 54 ++++++++++++++ include/svs/orchestrators/exhaustive.h | 10 +-- 10 files changed, 393 insertions(+), 101 deletions(-) create mode 100644 include/svs/lib/archiver.h create mode 100644 include/svs/lib/stream.h diff --git a/include/svs/core/data/io.h b/include/svs/core/data/io.h index 6bb856fed..8b072d8cd 100644 --- a/include/svs/core/data/io.h +++ b/include/svs/core/data/io.h @@ -79,6 +79,22 @@ void populate_impl( } } +template void populate(std::istream& is, Data& data) { + auto accessor = DefaultWriteAccessor(); + + size_t num_vectors = data.size(); + size_t dims = data.dimensions(); + + auto max_lines = Dynamic; + auto nvectors = std::min(num_vectors, max_lines); + + auto reader = lib::VectorReader(dims); + for (size_t i = 0; i < nvectors; ++i) { + reader.read(is); + accessor.set(data, i, reader.data()); + } +} + // Intercept the native file to perform dispatch on the actual file type. template void populate_impl( @@ -120,6 +136,17 @@ void save(const Dataset& data, const File& file, const lib::UUID& uuid = lib::Ze return save(data, accessor, file, uuid); } +template +void save(const Dataset& data, std::ostream& os) { + auto accessor = DefaultReadAccessor(); + auto writer = svs::io::v1::StreamWriter( + os, data.size(), accessor.serialized_dimensions(data) + ); + for (size_t i = 0; i < data.size(); ++i) { + writer << accessor.get(data, i); + } +} + /// /// @brief Save the dataset as a "*vecs" file. /// @@ -169,6 +196,14 @@ lib::lazy_result_t load_dataset(const File& file, const F& la return load_impl(detail::to_native(file), default_accessor, lazy); } +template F> +lib::lazy_result_t +load_dataset(std::istream& is, const F& lazy, size_t num_vectors, size_t dims) { + auto data = lazy(num_vectors, dims); + populate(is, data); + return data; +} + // Return whether or not a file is directly loadable via file-extension. inline bool special_by_file_extension(std::string_view path) { return (path.ends_with("svs") || path.ends_with("vecs") || path.ends_with("bin")); diff --git a/include/svs/core/data/simple.h b/include/svs/core/data/simple.h index 0fcb31bbb..3ef364ce1 100644 --- a/include/svs/core/data/simple.h +++ b/include/svs/core/data/simple.h @@ -75,24 +75,50 @@ class GenericSerializer { } template - static lib::SaveTable save(const Data& data, const lib::SaveContext& ctx) { + static size_t serialized_size(const Data& data) { using T = typename Data::element_type; - // UUID used to identify the file. - auto uuid = lib::UUID{}; - auto filename = ctx.generate_name("data"); - io::save(data, io::NativeFile(filename), uuid); - return lib::SaveTable( + constexpr size_t header_size = svs::io::v1::header_size; + size_t data_size = sizeof(T) * data.size() * data.dimensions(); + return header_size + data_size; + } + + template + static lib::SaveTable save_table(const Data& data) { + using T = typename Data::element_type; + auto table = lib::SaveTable( serialization_schema, save_version, { {"name", "uncompressed"}, - {"binary_file", lib::save(filename.filename())}, {"dims", lib::save(data.dimensions())}, {"num_vectors", lib::save(data.size())}, - {"uuid", uuid.str()}, {"eltype", lib::save(datatype_v)}, } ); + return table; + } + + template + static lib::SaveTable + save_table(const Data& data, const FileName_t& filename, const lib::UUID& uuid) { + auto table = save_table(data); + table.insert("binary_file", filename); + table.insert("uuid", uuid.str()); + return table; + } + + template + static lib::SaveTable save(const Data& data, const lib::SaveContext& ctx) { + // UUID used to identify the file. + auto uuid = lib::UUID{}; + auto filename = ctx.generate_name("data"); + io::save(data, io::NativeFile(filename), uuid); + return save_table(data, lib::save(filename.filename()), uuid); + } + + template + static void save(const Data& data, std::ostream& os) { + io::save(data, os); } template F> @@ -116,6 +142,25 @@ class GenericSerializer { } return io::load_dataset(binaryfile.value(), lazy); } + + template F> + static lib::lazy_result_t + load(const lib::ContextFreeLoadTable& table, std::istream& is, const F& lazy) { + auto datatype = lib::load_at(table, "eltype"); + if (datatype != datatype_v) { + throw ANNEXCEPTION( + "Trying to load an uncompressed dataset with element types {} to a dataset " + "with element types {}.", + name(datatype), + name>() + ); + } + + int64_t num_vectors = lib::load_at(table, "num_vectors"); + int64_t dims = lib::load_at(table, "dims"); + + return io::load_dataset(is, lazy, num_vectors, dims); + } }; struct Matcher { @@ -405,6 +450,12 @@ class SimpleData { return GenericSerializer::save(*this, ctx); } + void save(std::ostream& os) const { return GenericSerializer::save(*this, os); } + + lib::SaveTable save_table() const { return GenericSerializer::save_table(*this); } + + size_t serialized_size() const { return GenericSerializer::serialized_size(*this); } + static bool check_load_compatibility(std::string_view schema, lib::Version version) { return GenericSerializer::check_compatibility(schema, version); } @@ -431,6 +482,20 @@ class SimpleData { ); } + static SimpleData load( + const lib::ContextFreeLoadTable& table, + std::istream& is, + const allocator_type& allocator = {} + ) + requires(!is_view) + { + return GenericSerializer::load( + table, is, lib::Lazy([&](size_t n_elements, size_t n_dimensions) { + return SimpleData(n_elements, n_dimensions, allocator); + }) + ); + } + /// /// @brief Try to automatically load the dataset. /// @@ -805,6 +870,12 @@ class SimpleData> { return GenericSerializer::save(*this, ctx); } + void save(std::ostream& os) const { return GenericSerializer::save(*this, os); } + + lib::SaveTable save_table() const { return GenericSerializer::save_table(*this); } + + size_t serialized_size() const { return GenericSerializer::serialized_size(*this); } + static bool check_load_compatibility(std::string_view schema, lib::Version version) { return GenericSerializer::check_compatibility(schema, version); } @@ -818,6 +889,18 @@ class SimpleData> { ); } + static SimpleData load( + const lib::ContextFreeLoadTable& table, + std::istream& is, + const Blocked& allocator = {} + ) { + return GenericSerializer::load( + table, is, lib::Lazy([&allocator](size_t n_elements, size_t n_dimensions) { + return SimpleData(n_elements, n_dimensions, allocator); + }) + ); + } + static SimpleData load(const std::filesystem::path& path, const Blocked& allocator = {}) { if (detail::is_likely_reload(path)) { diff --git a/include/svs/core/io/native.h b/include/svs/core/io/native.h index 0039128d3..6fe290cf9 100644 --- a/include/svs/core/io/native.h +++ b/include/svs/core/io/native.h @@ -344,28 +344,16 @@ struct Header { static_assert(sizeof(Header) == header_size, "Mismatch in Native io::v1 header sizes!"); static_assert(std::is_trivially_copyable_v
, "Header must be trivially copyable!"); -template class Writer { +// CRTP +template class Writer { public: - Writer( - const std::string& path, - size_t dimension, - lib::UUID uuid = lib::UUID(lib::ZeroInitializer()) - ) - : dimension_{dimension} - , uuid_{uuid} - , stream_{lib::open_write(path, std::ofstream::out | std::ofstream::binary)} { - // Write a temporary header. - stream_.seekp(0, std::ofstream::beg); - lib::write_binary(stream_, Header()); - } - - size_t dimensions() const { return dimension_; } void overwrite_num_vectors(size_t num_vectors) { vectors_written_ = num_vectors; } // TODO: Error checking to make sure the length is correct. template Writer& append(U&& v) { + std::ostream& os = static_cast(this)->stream(); for (const auto& i : v) { - lib::write_binary(stream_, lib::io_convert(i)); + lib::write_binary(os, lib::io_convert(i)); } ++vectors_written_; return *this; @@ -374,13 +362,37 @@ template class Writer { template requires std::is_same_v Writer& append(std::tuple&& v) { - lib::foreach (v, [&](const auto& x) { lib::write_binary(stream_, x); }); + std::ostream& os = static_cast(this)->stream(); + lib::foreach (v, [&](const auto& x) { lib::write_binary(os, x); }); ++vectors_written_; return *this; } template Writer& operator<<(U&& v) { return append(std::forward(v)); } + protected: + size_t vectors_written_ = 0; +}; + +template class FileWriter : public Writer> { + public: + FileWriter( + const std::string& path, + size_t dimension, + lib::UUID uuid = lib::UUID(lib::ZeroInitializer()) + ) + : dimension_{dimension} + , uuid_{uuid} + , stream_{lib::open_write(path, std::ofstream::out | std::ofstream::binary)} { + // Write a temporary header. + stream_.seekp(0, std::ofstream::beg); + lib::write_binary(stream_, Header()); + } + + std::ostream& stream() { return stream_; } + + size_t dimensions() const { return dimension_; } + void flush() { stream_.flush(); } void writeheader(bool resume = true) { @@ -388,7 +400,7 @@ template class Writer { // Write to the header the number of vectors actually written. stream_.seekp(0); assert(stream_.good()); - lib::write_binary(stream_, Header(vectors_written_, dimension_, uuid_)); + lib::write_binary(stream_, Header(this->vectors_written_, dimension_, uuid_)); if (resume) { stream_.seekp(position, std::ofstream::beg); } @@ -402,20 +414,33 @@ template class Writer { // // We delete the copy constructor and copy assignment operators because // `std::ofstream` isn't copyable anyways. - Writer(const Writer&) = delete; - Writer& operator=(const Writer&) = delete; - Writer(Writer&&) = delete; - Writer& operator=(Writer&&) = delete; + FileWriter(const FileWriter&) = delete; + FileWriter& operator=(const FileWriter&) = delete; + FileWriter(FileWriter&&) = delete; + FileWriter& operator=(FileWriter&&) = delete; // Write the header for the file. - ~Writer() noexcept { writeheader(); } + ~FileWriter() noexcept { writeheader(); } private: size_t dimension_; lib::UUID uuid_; std::ofstream stream_; size_t writes_this_vector_ = 0; - size_t vectors_written_ = 0; +}; + +template class StreamWriter : public Writer> { + public: + StreamWriter(std::ostream& os, size_t num_vectors, size_t dimension) + : stream_{os} { + auto header = Header(num_vectors, dimension); + lib::write_binary(stream_, header); + } + + std::ostream& stream() { return stream_; } + + private: + std::ostream& stream_; }; /// @@ -449,13 +474,13 @@ class NativeFile { } template - Writer writer( + FileWriter writer( lib::Type SVS_UNUSED(type), size_t dimension, lib::UUID uuid = lib::ZeroUUID ) const { - return Writer(path_, dimension, uuid); + return FileWriter(path_, dimension, uuid); } - Writer<> writer(size_t dimensions, lib::UUID uuid = lib::ZeroUUID) const { + FileWriter<> writer(size_t dimensions, lib::UUID uuid = lib::ZeroUUID) const { return writer(lib::Type(), dimensions, uuid); } @@ -715,7 +740,7 @@ class NativeFile { public: using compatible_file_types = lib::Types; - template using Writer = v1::Writer; + template using Writer = v1::FileWriter; explicit NativeFile(std::filesystem::path path) : path_{std::move(path)} {} diff --git a/include/svs/index/flat/flat.h b/include/svs/index/flat/flat.h index 187fc7440..d81e7cf54 100644 --- a/include/svs/index/flat/flat.h +++ b/include/svs/index/flat/flat.h @@ -522,6 +522,8 @@ class FlatIndex { void save(const std::filesystem::path& data_directory) const { lib::save_to_disk(data_, data_directory); } + + void save(std::ostream& os) const { lib::save_to_stream(data_, os); } }; /// diff --git a/include/svs/lib/archiver.h b/include/svs/lib/archiver.h new file mode 100644 index 000000000..d332e7910 --- /dev/null +++ b/include/svs/lib/archiver.h @@ -0,0 +1,88 @@ +/* + * Copyright 2026 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// svs +#include "svs/lib/exception.h" + +// stl + +namespace svs::lib { + +// CRTP +template struct Archiver { + using size_type = uint64_t; + + // TODO: Define CACHELINE_BYTES in a common place + // rather than duplicating it here and in prefetch.h + static constexpr auto CACHELINE_BYTES = 64; + + static size_type write_size(std::ostream& os, size_type size) { + os.write(reinterpret_cast(&size), sizeof(size)); + if (!os) { + throw ANNEXCEPTION("Error writing to stream!"); + } + return sizeof(size); + } + + static size_type read_size(std::istream& is, size_type& size) { + is.read(reinterpret_cast(&size), sizeof(size)); + if (!is) { + throw ANNEXCEPTION("Error reading from stream!"); + } + return sizeof(size); + } + + static size_type write_name(std::ostream& os, const std::string& name) { + auto bytes = write_size(os, name.size()); + os.write(name.data(), name.size()); + if (!os) { + throw ANNEXCEPTION("Error writing to stream!"); + } + return bytes + name.size(); + } + + static size_type read_name(std::istream& is, std::string& name) { + size_type size = 0; + auto bytes = read_size(is, size); + name.resize(size); + is.read(name.data(), size); + if (!is) { + throw ANNEXCEPTION("Error reading from stream!"); + } + return bytes + size; + } + + static void read_from_istream(std::istream& in, std::ostream& out, size_t data_size) { + // Copy the data in chunks. + constexpr size_t buffer_size = 1 << 13; // 8KB buffer + alignas(CACHELINE_BYTES) char buffer[buffer_size]; + + size_t bytes_remaining = data_size; + while (bytes_remaining > 0) { + size_t to_read = std::min(buffer_size, bytes_remaining); + in.read(buffer, to_read); + if (!in) { + throw ANNEXCEPTION("Error reading from stream!"); + } + out.write(buffer, to_read); + bytes_remaining -= to_read; + } + } +}; + +} // namespace svs::lib diff --git a/include/svs/lib/file.h b/include/svs/lib/file.h index 937bc1502..c4df66099 100644 --- a/include/svs/lib/file.h +++ b/include/svs/lib/file.h @@ -17,6 +17,7 @@ #pragma once // svs +#include "svs/lib/archiver.h" #include "svs/lib/exception.h" #include "svs/lib/uuid.h" @@ -151,50 +152,9 @@ struct UniqueTempDirectory { // Uses a simple custom binary format. // Not meant to be super efficient, just a simple way to serialize a directory // structure to a stream. -struct DirectoryArchiver { - using size_type = uint64_t; - - // TODO: Define CACHELINE_BYTES in a common place - // rather than duplicating it here and in prefetch.h - static constexpr auto CACHELINE_BYTES = 64; +struct DirectoryArchiver : Archiver { static constexpr size_type magic_number = 0x5e2d58d9f3b4a6c1; - static size_type write_size(std::ostream& os, size_type size) { - os.write(reinterpret_cast(&size), sizeof(size)); - if (!os) { - throw ANNEXCEPTION("Error writing to stream!"); - } - return sizeof(size); - } - - static size_type read_size(std::istream& is, size_type& size) { - is.read(reinterpret_cast(&size), sizeof(size)); - if (!is) { - throw ANNEXCEPTION("Error reading from stream!"); - } - return sizeof(size); - } - - static size_type write_name(std::ostream& os, const std::string& name) { - auto bytes = write_size(os, name.size()); - os.write(name.data(), name.size()); - if (!os) { - throw ANNEXCEPTION("Error writing to stream!"); - } - return bytes + name.size(); - } - - static size_type read_name(std::istream& is, std::string& name) { - size_type size = 0; - auto bytes = read_size(is, size); - name.resize(size); - is.read(name.data(), size); - if (!is) { - throw ANNEXCEPTION("Error reading from stream!"); - } - return bytes + size; - } - static size_type write_file( std::ostream& stream, const std::filesystem::path& path, @@ -262,22 +222,9 @@ struct DirectoryArchiver { throw ANNEXCEPTION("Error opening file {} for writing!", path); } - // Copy the data in chunks. - constexpr size_t buffer_size = 1 << 13; // 8KB buffer - alignas(CACHELINE_BYTES) char buffer[buffer_size]; - - size_t bytes_remaining = filesize; - while (bytes_remaining > 0) { - size_t to_read = std::min(buffer_size, bytes_remaining); - stream.read(buffer, to_read); - if (!stream) { - throw ANNEXCEPTION("Error reading from stream!"); - } - out.write(buffer, to_read); - if (!out) { - throw ANNEXCEPTION("Error writing to file {}!", path); - } - bytes_remaining -= to_read; + read_from_istream(stream, out, filesize); + if (!out) { + throw ANNEXCEPTION("Error writing to file {}!", path); } return header_bytes + filesize; diff --git a/include/svs/lib/saveload/load.h b/include/svs/lib/saveload/load.h index 767e02afa..3763c6328 100644 --- a/include/svs/lib/saveload/load.h +++ b/include/svs/lib/saveload/load.h @@ -22,6 +22,9 @@ // stl #include +#include "svs/lib/file.h" +#include "svs/lib/stream.h" + namespace svs::lib { /// @@ -828,6 +831,28 @@ inline SerializedObject begin_deserialization(const std::filesystem::path& fullp std::move(table), lib::LoadContext{fullpath.parent_path(), version}}; } +inline ContextFreeSerializedObject begin_deserialization(std::istream& stream) { + lib::StreamArchiver::size_type magic = 0; + lib::StreamArchiver::read_size(stream, magic); + if (magic == lib::DirectoryArchiver::magic_number) { + // Backward compatibility mode for older versions + lib::StreamArchiver::size_type num_files = 0; + lib::StreamArchiver::read_size(stream, num_files); + + std::string file_name; + lib::StreamArchiver::read_name(stream, file_name); + } else if (magic != lib::StreamArchiver::magic_number) { + throw ANNEXCEPTION("Invalid magic number in stream deserialization!"); + } + + if (!stream) { + throw ANNEXCEPTION("Error reading from stream!"); + } + + auto table = lib::StreamArchiver::read_table(stream); + return ContextFreeSerializedObject{std::move(table)}; +} + } // namespace detail inline SerializedObject begin_deserialization(const std::filesystem::path& path) { @@ -877,6 +902,21 @@ T load_from_disk(const std::filesystem::path& path, Args&&... args) { return lib::load_from_disk(Loader(), path, SVS_FWD(args)...); } +///// load_from_stream +template +T load_from_stream(const Loader& loader, std::istream& stream, Args&&... args) { + // At this point, we will try the saving/loading framework to load the object. + // Here we go! + return lib::load( + loader, detail::begin_deserialization(stream), stream, SVS_FWD(args)... + ); +} + +template +T load_from_stream(std::istream& stream, Args&&... args) { + return lib::load_from_stream(Loader(), stream, SVS_FWD(args)...); +} + ///// load_from_file template diff --git a/include/svs/lib/saveload/save.h b/include/svs/lib/saveload/save.h index 60f556e77..f3fdc432e 100644 --- a/include/svs/lib/saveload/save.h +++ b/include/svs/lib/saveload/save.h @@ -319,6 +319,17 @@ void save_node_to_file( auto file = svs::lib::open_write(path, std::ios_base::out); file << top_table << "\n"; } + +template +void save_node_to_stream( + Nodelike&& node, std::ostream& os, const lib::Version& version = CURRENT_SAVE_VERSION +) { + auto top_table = toml::table( + {{config_version_key, version.str()}, {config_object_key, SVS_FWD(node)}} + ); + + StreamArchiver::write_table(os, top_table); +} } // namespace detail /// @@ -365,4 +376,15 @@ template void save_to_file(const T& x, const std::filesystem::path& detail::save_node_to_file(lib::save(x), path); } +template void save_to_stream(const T& x, std::ostream& os) { + lib::StreamArchiver::write_size(os, lib::StreamArchiver::magic_number); + + auto save_table = x.save_table(); + detail::save_node_to_stream(detail::exit_hook(save_table), os); + + lib::StreamArchiver::size_type serialized_size = x.serialized_size(); + lib::StreamArchiver::write_size(os, serialized_size); + x.save(os); +} + } // namespace svs::lib diff --git a/include/svs/lib/stream.h b/include/svs/lib/stream.h new file mode 100644 index 000000000..b69be2fb8 --- /dev/null +++ b/include/svs/lib/stream.h @@ -0,0 +1,54 @@ +/* + * Copyright 2026 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// saveload +#include "svs/lib/saveload/core.h" + +// svs +#include "svs/lib/archiver.h" +#include "svs/lib/exception.h" + +// stl + +namespace svs::lib { + +struct StreamArchiver : Archiver { + // SVS_STRM + static constexpr size_type magic_number = 0x5356535f5354524d; + + static auto read_table(std::istream& is) { + std::uint64_t tablesize = 0; + read_size(is, tablesize); + + std::stringstream ss; + read_from_istream(is, ss, tablesize); + + return toml::parse(ss); + } + + static void write_table(std::ostream& os, const toml::table& table) { + std::stringstream ss; + ss << table << "\n"; + + lib::StreamArchiver::size_type tablesize = ss.rdbuf()->view().size(); + lib::StreamArchiver::write_size(os, tablesize); + os << ss.rdbuf(); + } +}; + +} // namespace svs::lib diff --git a/include/svs/orchestrators/exhaustive.h b/include/svs/orchestrators/exhaustive.h index b33b6cc4a..7fa969ead 100644 --- a/include/svs/orchestrators/exhaustive.h +++ b/include/svs/orchestrators/exhaustive.h @@ -24,6 +24,7 @@ #include "svs/core/distance.h" #include "svs/core/graph.h" #include "svs/lib/preprocessor.h" +#include "svs/lib/stream.h" #include "svs/lib/threads.h" #include "svs/orchestrators/manager.h" @@ -87,9 +88,7 @@ class FlatImpl : public manager::ManagerImpl { void save(std::ostream& stream) const override { if constexpr (Impl::supports_saving) { - lib::UniqueTempDirectory tempdir{"svs_flat_save"}; - save(tempdir); - lib::DirectoryArchiver::pack(tempdir, stream); + impl().save(stream); } else { throw ANNEXCEPTION("The current Vamana backend doesn't support saving!"); } @@ -196,11 +195,8 @@ class Flat : public manager::IndexManager { ThreadPoolProto threadpool_proto, DataLoaderArgs&&... data_args ) { - namespace fs = std::filesystem; - lib::UniqueTempDirectory tempdir{"svs_flat_load"}; - lib::DirectoryArchiver::unpack(stream, tempdir); return assemble( - lib::load_from_disk(tempdir, SVS_FWD(data_args)...), + lib::load_from_stream(stream, SVS_FWD(data_args)...), distance, threads::as_threadpool(std::move(threadpool_proto)) ); From 77ce2518b515ebeb69531bd04609e5b4cb93568d Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Mon, 16 Feb 2026 05:44:49 -0800 Subject: [PATCH 2/8] fixes --- include/svs/core/data/io.h | 4 +--- include/svs/core/data/simple.h | 12 ------------ include/svs/core/io/native.h | 7 ++----- include/svs/lib/saveload/save.h | 2 -- include/svs/lib/stream.h | 5 ++++- 5 files changed, 7 insertions(+), 23 deletions(-) diff --git a/include/svs/core/data/io.h b/include/svs/core/data/io.h index 8b072d8cd..572ffa63a 100644 --- a/include/svs/core/data/io.h +++ b/include/svs/core/data/io.h @@ -139,9 +139,7 @@ void save(const Dataset& data, const File& file, const lib::UUID& uuid = lib::Ze template void save(const Dataset& data, std::ostream& os) { auto accessor = DefaultReadAccessor(); - auto writer = svs::io::v1::StreamWriter( - os, data.size(), accessor.serialized_dimensions(data) - ); + auto writer = svs::io::v1::StreamWriter(os); for (size_t i = 0; i < data.size(); ++i) { writer << accessor.get(data, i); } diff --git a/include/svs/core/data/simple.h b/include/svs/core/data/simple.h index 3ef364ce1..114491f46 100644 --- a/include/svs/core/data/simple.h +++ b/include/svs/core/data/simple.h @@ -74,14 +74,6 @@ class GenericSerializer { return schema == serialization_schema && version == save_version; } - template - static size_t serialized_size(const Data& data) { - using T = typename Data::element_type; - constexpr size_t header_size = svs::io::v1::header_size; - size_t data_size = sizeof(T) * data.size() * data.dimensions(); - return header_size + data_size; - } - template static lib::SaveTable save_table(const Data& data) { using T = typename Data::element_type; @@ -454,8 +446,6 @@ class SimpleData { lib::SaveTable save_table() const { return GenericSerializer::save_table(*this); } - size_t serialized_size() const { return GenericSerializer::serialized_size(*this); } - static bool check_load_compatibility(std::string_view schema, lib::Version version) { return GenericSerializer::check_compatibility(schema, version); } @@ -874,8 +864,6 @@ class SimpleData> { lib::SaveTable save_table() const { return GenericSerializer::save_table(*this); } - size_t serialized_size() const { return GenericSerializer::serialized_size(*this); } - static bool check_load_compatibility(std::string_view schema, lib::Version version) { return GenericSerializer::check_compatibility(schema, version); } diff --git a/include/svs/core/io/native.h b/include/svs/core/io/native.h index 6fe290cf9..0f6476686 100644 --- a/include/svs/core/io/native.h +++ b/include/svs/core/io/native.h @@ -431,11 +431,8 @@ template class FileWriter : public Writer> template class StreamWriter : public Writer> { public: - StreamWriter(std::ostream& os, size_t num_vectors, size_t dimension) - : stream_{os} { - auto header = Header(num_vectors, dimension); - lib::write_binary(stream_, header); - } + StreamWriter(std::ostream& os) + : stream_{os} {} std::ostream& stream() { return stream_; } diff --git a/include/svs/lib/saveload/save.h b/include/svs/lib/saveload/save.h index f3fdc432e..01ed89c75 100644 --- a/include/svs/lib/saveload/save.h +++ b/include/svs/lib/saveload/save.h @@ -382,8 +382,6 @@ template void save_to_stream(const T& x, std::ostream& os) { auto save_table = x.save_table(); detail::save_node_to_stream(detail::exit_hook(save_table), os); - lib::StreamArchiver::size_type serialized_size = x.serialized_size(); - lib::StreamArchiver::write_size(os, serialized_size); x.save(os); } diff --git a/include/svs/lib/stream.h b/include/svs/lib/stream.h index b69be2fb8..6256ddedf 100644 --- a/include/svs/lib/stream.h +++ b/include/svs/lib/stream.h @@ -45,7 +45,10 @@ struct StreamArchiver : Archiver { std::stringstream ss; ss << table << "\n"; - lib::StreamArchiver::size_type tablesize = ss.rdbuf()->view().size(); + // The best way is to use ss.rdbuf()->view().size(), + // but Apple's Clang 15 doesn't support std::stringbuf::view() + lib::StreamArchiver::size_type tablesize = ss.tellp(); + lib::StreamArchiver::write_size(os, tablesize); os << ss.rdbuf(); } From 1055a820bef248d54821366d7011a728cd479136 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Wed, 18 Feb 2026 12:27:37 +0100 Subject: [PATCH 3/8] Update include/svs/core/data/simple.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- include/svs/core/data/simple.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/svs/core/data/simple.h b/include/svs/core/data/simple.h index 114491f46..cf3c5df24 100644 --- a/include/svs/core/data/simple.h +++ b/include/svs/core/data/simple.h @@ -148,8 +148,8 @@ class GenericSerializer { ); } - int64_t num_vectors = lib::load_at(table, "num_vectors"); - int64_t dims = lib::load_at(table, "dims"); + size_t num_vectors = lib::load_at(table, "num_vectors"); + size_t dims = lib::load_at(table, "dims"); return io::load_dataset(is, lazy, num_vectors, dims); } From a186515bbf8ce73a41c555bd7532798f207fb2b2 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Wed, 18 Feb 2026 04:05:30 -0800 Subject: [PATCH 4/8] adress copilot comments --- include/svs/lib/archiver.h | 5 +++++ include/svs/lib/saveload/save.h | 1 + include/svs/lib/stream.h | 11 ++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/svs/lib/archiver.h b/include/svs/lib/archiver.h index d332e7910..ce3ca438f 100644 --- a/include/svs/lib/archiver.h +++ b/include/svs/lib/archiver.h @@ -20,6 +20,11 @@ #include "svs/lib/exception.h" // stl +#include +#include +#include +#include +#include namespace svs::lib { diff --git a/include/svs/lib/saveload/save.h b/include/svs/lib/saveload/save.h index 01ed89c75..fe7694c45 100644 --- a/include/svs/lib/saveload/save.h +++ b/include/svs/lib/saveload/save.h @@ -22,6 +22,7 @@ // svs #include "svs/lib/file.h" #include "svs/lib/readwrite.h" +#include "svs/lib/stream.h" #include "svs/lib/version.h" // stl diff --git a/include/svs/lib/stream.h b/include/svs/lib/stream.h index 6256ddedf..4f75c9781 100644 --- a/include/svs/lib/stream.h +++ b/include/svs/lib/stream.h @@ -24,6 +24,8 @@ #include "svs/lib/exception.h" // stl +#include +#include namespace svs::lib { @@ -47,10 +49,17 @@ struct StreamArchiver : Archiver { // The best way is to use ss.rdbuf()->view().size(), // but Apple's Clang 15 doesn't support std::stringbuf::view() - lib::StreamArchiver::size_type tablesize = ss.tellp(); + auto stream_output_position = ss.tellp(); + if (stream_output_position < 0) { + throw ANNEXCEPTION("Error writing table!"); + } + lib::StreamArchiver::size_type tablesize = stream_output_position; lib::StreamArchiver::write_size(os, tablesize); os << ss.rdbuf(); + if (!os) { + throw ANNEXCEPTION("Error writing to stream!"); + } } }; From a0988f0c072a9d9fab6a8312b593cf7a35da87f3 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Wed, 18 Feb 2026 06:32:57 -0800 Subject: [PATCH 5/8] avoid stringstream.tellp(); improve comments --- include/svs/lib/saveload/load.h | 4 +++- include/svs/lib/stream.h | 19 ++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/svs/lib/saveload/load.h b/include/svs/lib/saveload/load.h index 3763c6328..cf88c4746 100644 --- a/include/svs/lib/saveload/load.h +++ b/include/svs/lib/saveload/load.h @@ -835,7 +835,9 @@ inline ContextFreeSerializedObject begin_deserialization(std::istream& stream) { lib::StreamArchiver::size_type magic = 0; lib::StreamArchiver::read_size(stream, magic); if (magic == lib::DirectoryArchiver::magic_number) { - // Backward compatibility mode for older versions + // Backward compatibility mode for older versions: + // Previously, SVS serialized models using an intermediate file, + // so some dummy information was added to the stream. lib::StreamArchiver::size_type num_files = 0; lib::StreamArchiver::read_size(stream, num_files); diff --git a/include/svs/lib/stream.h b/include/svs/lib/stream.h index 4f75c9781..b885060a1 100644 --- a/include/svs/lib/stream.h +++ b/include/svs/lib/stream.h @@ -27,6 +27,11 @@ #include #include +namespace { +template +concept HasStringbufView = requires(Stream s) { s.rdbuf()->view(); }; +} + namespace svs::lib { struct StreamArchiver : Archiver { @@ -47,13 +52,17 @@ struct StreamArchiver : Archiver { std::stringstream ss; ss << table << "\n"; - // The best way is to use ss.rdbuf()->view().size(), + // The best way to get the table size is a c++20 feature: + // ss.rdbuf()->view().size(), // but Apple's Clang 15 doesn't support std::stringbuf::view() - auto stream_output_position = ss.tellp(); - if (stream_output_position < 0) { - throw ANNEXCEPTION("Error writing table!"); + lib::StreamArchiver::size_type tablesize; + if constexpr (HasStringbufView) { + tablesize = ss.rdbuf()->view().size(); + } else { + // fallback with creating a temporary copy + throw ANNEXCEPTION("Fallback!"); + tablesize = ss.str().size(); } - lib::StreamArchiver::size_type tablesize = stream_output_position; lib::StreamArchiver::write_size(os, tablesize); os << ss.rdbuf(); From f593ba6e113c66ab2fe1b5228291a51c1b873a96 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Wed, 18 Feb 2026 06:43:41 -0800 Subject: [PATCH 6/8] fix macos --- include/svs/lib/stream.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/include/svs/lib/stream.h b/include/svs/lib/stream.h index b885060a1..ea9f8777d 100644 --- a/include/svs/lib/stream.h +++ b/include/svs/lib/stream.h @@ -28,9 +28,14 @@ #include namespace { -template -concept HasStringbufView = requires(Stream s) { s.rdbuf()->view(); }; +template auto get_buffer_size(T& ss) { + if constexpr (requires { ss.rdbuf()->view(); }) { + return ss.rdbuf()->view().size(); + } else { + return ss.str().size(); + } } +} // namespace namespace svs::lib { @@ -55,14 +60,7 @@ struct StreamArchiver : Archiver { // The best way to get the table size is a c++20 feature: // ss.rdbuf()->view().size(), // but Apple's Clang 15 doesn't support std::stringbuf::view() - lib::StreamArchiver::size_type tablesize; - if constexpr (HasStringbufView) { - tablesize = ss.rdbuf()->view().size(); - } else { - // fallback with creating a temporary copy - throw ANNEXCEPTION("Fallback!"); - tablesize = ss.str().size(); - } + lib::StreamArchiver::size_type tablesize = get_buffer_size(ss); lib::StreamArchiver::write_size(os, tablesize); os << ss.rdbuf(); From 394fb3ec0e90c6b6b69dacfe7c5669a392fcd024 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Thu, 19 Feb 2026 06:05:51 -0800 Subject: [PATCH 7/8] fix backwardcompatibility; add tests --- include/svs/lib/saveload/load.h | 14 ++++++ tests/svs/index/flat/flat.cpp | 78 +++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/include/svs/lib/saveload/load.h b/include/svs/lib/saveload/load.h index cf88c4746..c848e80c0 100644 --- a/include/svs/lib/saveload/load.h +++ b/include/svs/lib/saveload/load.h @@ -22,7 +22,9 @@ // stl #include +#include "svs/core/io/native.h" #include "svs/lib/file.h" +#include "svs/lib/readwrite.h" #include "svs/lib/stream.h" namespace svs::lib { @@ -852,6 +854,18 @@ inline ContextFreeSerializedObject begin_deserialization(std::istream& stream) { } auto table = lib::StreamArchiver::read_table(stream); + + if (magic == lib::DirectoryArchiver::magic_number) { + // Backward compatibility mode for older versions: + // Previously, SVS serialized models using an intermediate file, + // so some dummy information was added to the stream. + std::string file_name; + lib::StreamArchiver::read_name(stream, file_name); + + lib::StreamArchiver::size_type file_size = 0; + lib::StreamArchiver::read_size(stream, file_size); + lib::read_binary(stream); + } return ContextFreeSerializedObject{std::move(table)}; } diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp index 57f821781..25968a420 100644 --- a/tests/svs/index/flat/flat.cpp +++ b/tests/svs/index/flat/flat.cpp @@ -16,6 +16,11 @@ #include "svs/index/flat/flat.h" #include "svs/core/logging.h" +#include "svs/lib/file.h" +#include "svs/lib/saveload/load.h" + +// tests +#include "tests/utils/test_dataset.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -66,3 +71,76 @@ CATCH_TEST_CASE("FlatIndex Logging Test", "[logging]") { CATCH_REQUIRE(captured_logs.size() == 1); CATCH_REQUIRE(captured_logs[0] == "Test FlatIndex Logging"); } + +CATCH_TEST_CASE("Flat Index Save and Load", "[flat][index][saveload]") { + using Data_t = svs::data::SimpleData; + using Distance_t = svs::distance::DistanceL2; + using Index_t = svs::index::flat::FlatIndex; + + // Load test data + auto data = Data_t::load(test_dataset::data_svs_file()); + auto queries = test_dataset::queries(); + + // Build index + Distance_t dist; + Index_t index = Index_t(std::move(data), dist, svs::threads::DefaultThreadPool(1)); + + size_t num_neighbors = 10; + auto results = svs::QueryResult(queries.size(), num_neighbors); + index.search(results.view(), queries.cview(), {}); + + CATCH_SECTION("Load Flat being serialized natively to stream") { + std::stringstream ss; + index.save(ss); + + Index_t loaded_index = Index_t( + svs::lib::load_from_stream(ss), dist, svs::threads::DefaultThreadPool(1) + ); + + CATCH_REQUIRE(loaded_index.size() == index.size()); + CATCH_REQUIRE(loaded_index.dimensions() == index.dimensions()); + + auto loaded_results = svs::QueryResult(queries.size(), num_neighbors); + loaded_index.search(loaded_results.view(), queries.cview(), {}); + + // Compare results - should be identical + for (size_t q = 0; q < queries.size(); ++q) { + for (size_t i = 0; i < num_neighbors; ++i) { + CATCH_REQUIRE(loaded_results.index(q, i) == results.index(q, i)); + CATCH_REQUIRE( + loaded_results.distance(q, i) == + Catch::Approx(results.distance(q, i)).epsilon(1e-5) + ); + } + } + } + + CATCH_SECTION("Load Flat being serialized with intermidiate files") { + std::stringstream ss; + + svs::lib::UniqueTempDirectory tempdir{"svs_flat_save"}; + index.save(tempdir); + svs::lib::DirectoryArchiver::pack(tempdir, ss); + + Index_t loaded_index = Index_t( + svs::lib::load_from_stream(ss), dist, svs::threads::DefaultThreadPool(1) + ); + + CATCH_REQUIRE(loaded_index.size() == index.size()); + CATCH_REQUIRE(loaded_index.dimensions() == index.dimensions()); + + auto loaded_results = svs::QueryResult(queries.size(), num_neighbors); + loaded_index.search(loaded_results.view(), queries.cview(), {}); + + // Compare results - should be identical + for (size_t q = 0; q < queries.size(); ++q) { + for (size_t i = 0; i < num_neighbors; ++i) { + CATCH_REQUIRE(loaded_results.index(q, i) == results.index(q, i)); + CATCH_REQUIRE( + loaded_results.distance(q, i) == + Catch::Approx(results.distance(q, i)).epsilon(1e-5) + ); + } + } + } +} From 0b81537faffb7effa841bf00e8b8713efb9c7121 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin Date: Thu, 19 Feb 2026 06:51:15 -0800 Subject: [PATCH 8/8] typos and minor fixes --- include/svs/lib/stream.h | 6 +++--- tests/svs/index/flat/flat.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/svs/lib/stream.h b/include/svs/lib/stream.h index ea9f8777d..9b9325335 100644 --- a/include/svs/lib/stream.h +++ b/include/svs/lib/stream.h @@ -27,7 +27,7 @@ #include #include -namespace { +namespace svs::lib::detail { template auto get_buffer_size(T& ss) { if constexpr (requires { ss.rdbuf()->view(); }) { return ss.rdbuf()->view().size(); @@ -35,7 +35,7 @@ template auto get_buffer_size(T& ss) { return ss.str().size(); } } -} // namespace +} // namespace svs::lib::detail namespace svs::lib { @@ -60,7 +60,7 @@ struct StreamArchiver : Archiver { // The best way to get the table size is a c++20 feature: // ss.rdbuf()->view().size(), // but Apple's Clang 15 doesn't support std::stringbuf::view() - lib::StreamArchiver::size_type tablesize = get_buffer_size(ss); + lib::StreamArchiver::size_type tablesize = detail::get_buffer_size(ss); lib::StreamArchiver::write_size(os, tablesize); os << ss.rdbuf(); diff --git a/tests/svs/index/flat/flat.cpp b/tests/svs/index/flat/flat.cpp index 25968a420..d09532d8b 100644 --- a/tests/svs/index/flat/flat.cpp +++ b/tests/svs/index/flat/flat.cpp @@ -115,7 +115,7 @@ CATCH_TEST_CASE("Flat Index Save and Load", "[flat][index][saveload]") { } } - CATCH_SECTION("Load Flat being serialized with intermidiate files") { + CATCH_SECTION("Load Flat being serialized with intermediate files") { std::stringstream ss; svs::lib::UniqueTempDirectory tempdir{"svs_flat_save"};