我正在将一组
content
(使用 boost::multi_index_container
由多个属性索引)和 params
结构序列化为二进制存档,我想在后者反序列化该二进制存档。但是使用 boost 1.83 读取时,使用 boost 1.74 创建的存档不可读(无效或损坏的存档)。
我在 git repo 中包含了一个 mre。虽然它是一个小的 cpp 文件,但我创建了一个存储库来与 CMakeLists.txt 和 Dockerfile 共享它。以下是我的
content
struct content{
friend class boost::serialization::access;
using angle_type = std::size_t;
inline content(angle_type angle): _angle(angle) {}
inline angle_type angle() const { return _angle; }
void reset_angle_random(){
static std::random_device dev;
static std::mt19937 rng_angle(dev());
std::uniform_int_distribution<> angle_dist(0, 180);
_angle = angle_dist(rng_angle);
}
void freeze(){
// complicated deterministic business logic
_angle = 0;
}
content frozen() const{
mre::content copy(*this);
copy.freeze();
return copy;
}
static content generate(){
static std::random_device dev;
static std::mt19937 rng(dev());
std::uniform_real_distribution<> dist_length(-0.5f, 0.5f);
mre::content content{0};
content._length = dist_length(rng);
content.reset_angle_random();
return content;
}
template<class Archive>
void serialize(Archive & ar, const unsigned int version) {
ar & boost::serialization::make_nvp("length", _length);
ar & boost::serialization::make_nvp("angle", _angle);
}
friend std::size_t hash_value(content const& c){
std::size_t seed = 0;
boost::hash_combine(seed, c._length);
boost::hash_combine(seed, c._angle);
return seed;
}
inline std::size_t hash() const { return boost::hash<mre::content>{}(*this); }
inline std::size_t frozen_id() const { return frozen().hash(); }
inline std::string id() const { return (boost::format("%1%~%2%-%3%") % frozen_id() % hash() % angle()).str(); }
inline bool operator<(const content& other) const { return id() < other.id(); }
private:
double _length;
angle_type _angle;
private:
content() = default;
};
我正在处理的实际代码要大得多,并且不使用此处提到的内容结构。上面提到的内容结构是一个高度简化的版本,用于制作最小的可重现示例。以下是我的多索引容器设置。
struct package{
friend class boost::serialization::access;
struct tags{
struct id{};
struct content{};
struct angle{};
struct frozen{};
};
using container = boost::multi_index_container<
mre::content,
boost::multi_index::indexed_by<
boost::multi_index::ordered_unique<boost::multi_index::identity<mre::content>>,
boost::multi_index::ordered_unique<boost::multi_index::tag<tags::id>, boost::multi_index::const_mem_fun<mre::content, std::string, &mre::content::id>>,
boost::multi_index::ordered_non_unique<boost::multi_index::tag<tags::content>, boost::multi_index::const_mem_fun<mre::content, std::size_t, &mre::content::hash>>,
boost::multi_index::ordered_non_unique<boost::multi_index::tag<tags::angle>, boost::multi_index::const_mem_fun<mre::content, mre::content::angle_type, &mre::content::angle>>,
boost::multi_index::ordered_non_unique<boost::multi_index::tag<tags::frozen>, boost::multi_index::const_mem_fun<mre::content, std::size_t, &mre::content::frozen_id>>
>
>;
inline explicit package(const mre::parameters& params): _loaded(false), _parameters(params) {}
inline explicit package(): _loaded(false) {}
void save(const std::string& filename) const;
void load(const std::string& filename);
inline std::size_t size() const { return _samples.size(); }
inline bool loaded() const { return _loaded; }
const mre::content& operator[](const std::string& id) const;
const mre::parameters& params() const { return _parameters; }
template<class Archive>
void serialize(Archive & ar, const unsigned int version) {
ar & boost::serialization::make_nvp("samples", _samples);
ar & boost::serialization::make_nvp("params", _parameters);
}
public:
std::size_t generate(std::size_t contents, std::size_t angles);
private:
bool _loaded;
container _samples;
mre::parameters _parameters;
};
我还在序列化下面提到的一组参数。
struct parameters{
std::size_t degree;
std::size_t frame_size;
template<class Archive>
void serialize(Archive & ar, const unsigned int version) {
ar & boost::serialization::make_nvp("degree", degree);
ar & boost::serialization::make_nvp("frame_size", frame_size);
}
};
保存、加载、生成如下
void mre::package::save(const std::string& filename) const {
std::ofstream stream(filename, std::ios::binary);
try{
boost::archive::binary_oarchive out(stream, boost::archive::no_tracking);
std::cout << "serialization library version: " << out.get_library_version() << std::endl;
out << *this;
} catch(const std::exception& e){
std::cout << "Error saving archive: " << e.what() << std::endl;
}
stream.close();
}
void mre::package::load(const std::string& filename){
std::ifstream stream(filename, std::ios::binary);
try{
boost::archive::binary_iarchive in(stream, boost::archive::no_tracking);
std::cout << "serialization library version: " << in.get_library_version() << std::endl;
in >> *this;
_loaded = true;
} catch(const std::exception& e){
std::cout << "Error loading archive: " << e.what() << std::endl;
}
stream.close();
}
std::size_t mre::package::generate(std::size_t contents, std::size_t angles){
std::size_t count = 0;
std::size_t v_content = 0;
while(v_content++ < contents){
mre::content x = mre::content::generate();
std::size_t v_angle = 0;
while(v_angle++ < angles){
mre::content x_angle = x;
x_angle.reset_angle_random(); // commenting out this line makes it work
if (_samples.insert(x_angle).second)
++count;
}
}
return count;
}
它看起来像是 boost 多索引容器中的一个错误。但我不知道是否存在任何此类错误。我可以通过在具有最新版本的 boost 库的 Arch Linux 机器中编译 mre 来重现该问题。 mre 还包含一个 docker 目标,它将其编译为 ubuntu 22.04 映像,其中默认 boost 版本为 1.74。可以使用可执行文件
mre
来测试该问题,如下所示。
cd build
cmake .. && make
./mre pack archive_name 10 # to serialize 10 randomly generated contents and save to file named archive_name
./mre unpack archive_name # to de-serialize
我为了测试一下它可以使用docker编译的不兼容性。
make docker # compiles and generates a file named arc inside build/archives directory of the host machine
./mre unpack archives/arc # which throws exception
这个看了半天,没看出来。然而,通过修复种子并验证我们是否获得确定性数据,我注意到结果是“相同的”,但顺序不同。
我注意到默认索引已经多次间接依赖于哈希:
┌─────────────────────┐
│ │
┌−−−−−−−−−−−−−−−−−┐ │
╎ both ╎ │
╎ ╎ ▼
┌───────────┐ ╎ ┌─────────────┐ ╎ ┌────────┐ ┌───────────────┐ ┌───────────────────────┐
│ operator< │ ──▶ ╎ │ id() │ ╎ ──▶ │ hash() │ ──▶ │ boost::hash<> │ ──▶ │ boost::hash_combine() │
└───────────┘ ╎ └─────────────┘ ╎ └────────┘ └───────────────┘ └───────────────────────┘
╎ │ ╎
╎ │ ╎
╎ ▼ ╎
╎ ┌─────────────┐ ╎
╎ │ frozen_id() │ ╎
╎ └─────────────┘ ╎
╎ ╎
└−−−−−−−−−−−−−−−−−┘
由于第一个索引实际上也是唯一的,并且唯一的组成部分是哈希值和角度,这可能会导致不同版本的 Boost ContainerHash 具有不同的唯一性。
Boost
hash_combine
不保证稳定性或可移植性。事实上,大多数常见的哈希函数都没有,例如std::hash
:
实际的哈希函数是依赖于实现的,并且不需要满足除上面指定的之外的任何其他质量标准
事实上,根据确定性哈希来持久保存信息在任何地方都是一个逻辑错误,除非您只是在同一进程中重新读取相同的信息,因为
具体来说,哈希函数只需要在程序的单次执行中为相同的输入产生相同的结果;这允许加盐哈希来防止冲突拒绝服务攻击。
hash_combine
在1.74和1.83之间发生了很多变化。您应该重新考虑您的索引。事实上,我认为依赖于非唯一哈希值的哈希值被用作唯一索引的键 (
identity
)。修复?
inline constexpr std::tuple<double, angle_type> key() const { return {_length, _angle}; }
friend std::size_t hash_value(content const& c) { return boost::hash_value(c.key()); }
然后也许更像是:
using key_type = std::tuple<double, angle_type>;
key_type key() const { return {_length, _angle}; }
key_type frozen_key() const { return frozen().key(); }
friend size_t hash_value(content const& c) { return boost::hash_value(c.key()); }
size_t hash() const { return hash_value(*this); }
auto idkey() const { return std::tuple(frozen_key(), key(), angle()); }
std::string id() const { return fmt::format("{}", idkey()); }
bool operator<(content const& other) const { return idkey() < other.idkey(); }
我用 libfmt 代替 Boost Format,因为它可以直接格式化元组,而无需我做工作:)基本上,我不会丢弃这些信息,无论如何,这看起来像是代码味道,但也导致索引依赖于非确定性函数。
这是我的激励代码清单,包括可选择使用固定种子的调整:
#include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <boost/container_hash/hash.hpp>
#include <boost/core/nvp.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/multi_index/key.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index_container.hpp>
#include <boost/serialization/utility.hpp>
#include <boost/serialization/vector.hpp>
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <random>
#include <string>
#include <fmt/format.h>
#include <fmt/ranges.h>
static constexpr boost::archive::archive_flags FLAGS = boost::archive::archive_flags::no_tracking; // {};
#if 1
using IA = boost::archive::binary_iarchive;
using OA = boost::archive::binary_oarchive;
#else
using IA = boost::archive::text_iarchive;
using OA = boost::archive::text_oarchive;
#endif
namespace mre {
struct content {
friend class boost::serialization::access;
using angle_type = std::size_t;
inline content(angle_type angle) : _angle(angle) {}
inline angle_type angle() const { return _angle; }
void reset_angle_random(size_t seed) {
static std::mt19937 rng_angle(seed);
std::uniform_int_distribution<> angle_dist(0, 180);
_angle = angle_dist(rng_angle);
}
void freeze() {
// complicated deterministic business logic
_angle = 0;
}
content frozen() const {
mre::content copy(*this);
copy.freeze();
return copy;
}
static content generate() {
return generate(std::random_device{}());
}
static content generate(size_t seed) {
static std::mt19937 rng(seed);
std::uniform_real_distribution<> dist_length(-0.5f, 0.5f);
mre::content content{0};
content._length = dist_length(rng);
content.reset_angle_random(rng());
return content;
}
template <class Archive> void serialize(Archive& ar, unsigned) {
ar& boost::serialization::make_nvp("length", _length);
ar& boost::serialization::make_nvp("angle", _angle);
}
using key_type = std::tuple<double, angle_type>;
key_type key() const { return {_length, _angle}; }
key_type frozen_key() const { return frozen().key(); }
friend size_t hash_value(content const& c) { return boost::hash_value(c.key()); }
size_t hash() const { return hash_value(*this); }
auto idkey() const { return std::tuple(frozen_key(), key(), angle()); }
std::string id() const { return fmt::format("{}", idkey()); }
bool operator<(content const& other) const { return idkey() < other.idkey(); }
private:
double _length;
angle_type _angle;
private:
content() = default;
};
struct parameters {
std::size_t degree;
std::size_t frame_size;
template <class Archive> void serialize(Archive& ar, unsigned) {
ar& boost::serialization::make_nvp("degree", degree);
ar& boost::serialization::make_nvp("frame_size", frame_size);
}
};
std::ostream& operator<<(std::ostream& stream, mre::parameters const& params);
namespace bmi = boost::multi_index;
struct package {
friend class boost::serialization::access;
struct tags {
struct id {};
struct content {};
struct angle {};
struct frozen {};
};
using container = bmi::multi_index_container<
mre::content,
bmi::indexed_by<
bmi::ordered_unique<bmi::identity<mre::content>>,
bmi::ordered_unique<bmi::tag<tags::id>, bmi::key<&mre::content::id>>,
bmi::ordered_non_unique<bmi::tag<tags::content>, bmi::key<&mre::content::hash>>,
bmi::ordered_non_unique<bmi::tag<tags::angle>, bmi::key< &mre::content::angle>>,
bmi::ordered_non_unique<bmi::tag<tags::frozen>, bmi::key<&mre::content::frozen_key>>>>;
inline explicit package(mre::parameters const& params) : _loaded(false), _parameters(params) {}
inline explicit package() : _loaded(false) {}
void save(std::string const& filename) const;
void load(std::string const& filename);
inline std::size_t size() const { return _samples.size(); }
inline bool loaded() const { return _loaded; }
mre::content const& operator[](std::string const& id) const;
mre::parameters const& params() const { return _parameters; }
template <class Archive> void serialize(Archive& ar, unsigned) {
ar& boost::serialization::make_nvp("samples", _samples);
ar& boost::serialization::make_nvp("params", _parameters);
}
public:
std::size_t generate(std::size_t contents, std::size_t angles, size_t seed);
std::size_t generate(std::size_t contents, std::size_t angles) {
return generate(contents, angles, std::random_device{}());
}
private:
bool _loaded;
container _samples;
mre::parameters _parameters;
};
} // namespace mre
// { sources
std::ostream& mre::operator<<(std::ostream& stream, mre::parameters const& params) {
stream << "params {" << std::endl;
stream << " degree: " << params.degree << std::endl;
stream << " frame_size: " << params.frame_size << std::endl;
stream << "}";
return stream;
}
void mre::package::save(std::string const& filename) const {
std::ofstream stream(filename, std::ios::binary);
try {
OA out(stream, FLAGS);
std::cout << "serialization library version: " << out.get_library_version() << std::endl;
out << *this;
} catch (std::exception const& e) {
std::cout << "Error saving archive: " << e.what() << std::endl;
}
stream.close();
}
void mre::package::load(std::string const& filename) {
std::ifstream stream(filename, std::ios::binary);
try {
IA in(stream, FLAGS);
std::cout << "deserialization library version: " << in.get_library_version() << std::endl;
in >> *this;
_loaded = true;
} catch (std::exception const& e) {
std::cout << "Error loading archive: " << e.what() << std::endl;
}
stream.close();
}
std::size_t mre::package::generate(std::size_t contents, std::size_t angles, size_t seed) {
std::size_t count = 0;
std::size_t v_content = 0;
while (v_content++ < contents) {
mre::content x = mre::content::generate(seed);
std::size_t v_angle = 0;
while (v_angle++ < angles) {
mre::content x_angle = x;
x_angle.reset_angle_random(seed); // commenting out this line makes it work
if (_samples.insert(x_angle).second)
++count;
}
}
return count;
}
int main(int argc, char** argv) {
std::cout << "Boost " << BOOST_VERSION << "\n";
if (argc < 2) {
std::cout << "Usage: " << std::endl
<< argv[0] << " pack FILENAME N" << std::endl
<< argv[0] << " unpack FILENAME" << std::endl;
return 1;
}
if (argv[1] == std::string("pack")) {
auto params = mre::parameters{.degree = 4, .frame_size = 128};
mre::package package(params);
std::size_t count = package.generate(boost::lexical_cast<std::size_t>(argv[3]), 4, 0xcafebabe);
package.save(argv[2]);
std::cout << "serialized: " << count << " contents" << std::endl;
return 0;
} else if (argv[1] == std::string("unpack")) {
mre::package package;
package.load(argv[2]);
if (package.loaded()) {
std::cout << "Package loaded: " << package.size() << std::endl << package.params() << std::endl;
package.save("roundtrip");
return 0;
}
return 1;
} else {
std::cout << "Usage: " << std::endl
<< argv[0] << " pack FILENAME N" << std::endl
<< argv[0] << " unpack FILENAME" << std::endl;
return 1;
}
}
TL;博士