通过多索引容器增强 1.74 和 1.83 之间的序列化不兼容性

问题描述 投票:0回答:1

我正在将一组

content
(使用
boost::multi_index_container
由多个属性索引)和
params
结构序列化为二进制存档,我想在后者反序列化该二进制存档。但是使用 boost 1.83 读取时,使用 boost 1.74 创建的存档不可读(无效或损坏的存档)。

我在 git repo 中包含了一个 mre。虽然它是一个小的 cpp 文件,但我创建了一个存储库来与 CMakeLists.txt 和 Dockerfile 共享它。以下是我的

content

struct content{
    friend class boost::serialization::access;
    using angle_type = std::size_t;

    inline content(angle_type angle): _angle(angle) {}
    inline angle_type angle() const { return _angle; }
    void reset_angle_random(){
        static std::random_device dev;
        static std::mt19937 rng_angle(dev());
        std::uniform_int_distribution<> angle_dist(0, 180);
        _angle = angle_dist(rng_angle);
    }
    void freeze(){
        // complicated deterministic business logic
        _angle = 0;
    }
    content frozen() const{
        mre::content copy(*this);
        copy.freeze();
        return copy;
    }

    static content generate(){
        static std::random_device  dev;
        static std::mt19937        rng(dev());
        std::uniform_real_distribution<> dist_length(-0.5f, 0.5f);

        mre::content content{0};
        content._length = dist_length(rng);
        content.reset_angle_random();
        return content;
    }

    template<class Archive>
    void serialize(Archive & ar, const unsigned int version) {
        ar & boost::serialization::make_nvp("length",  _length);
        ar & boost::serialization::make_nvp("angle", _angle);
    }

    friend std::size_t hash_value(content const& c){
        std::size_t seed = 0;
        boost::hash_combine(seed, c._length);
        boost::hash_combine(seed, c._angle);
        return seed;
    }

    inline std::size_t hash() const { return boost::hash<mre::content>{}(*this); }
    inline std::size_t frozen_id() const { return frozen().hash(); }
    inline std::string id() const { return (boost::format("%1%~%2%-%3%") % frozen_id() % hash() % angle()).str(); }
    inline bool operator<(const content& other) const { return id() < other.id(); }
    private:
        double _length;
        angle_type _angle;

    private:
        content() = default;
};

我正在处理的实际代码要大得多,并且不使用此处提到的内容结构。上面提到的内容结构是一个高度简化的版本,用于制作最小的可重现示例。以下是我的多索引容器设置。

struct package{
    friend class boost::serialization::access;

    struct tags{
        struct id{};
        struct content{};
        struct angle{};
        struct frozen{};
    };

    using container = boost::multi_index_container<
        mre::content,
        boost::multi_index::indexed_by<
            boost::multi_index::ordered_unique<boost::multi_index::identity<mre::content>>,
            boost::multi_index::ordered_unique<boost::multi_index::tag<tags::id>, boost::multi_index::const_mem_fun<mre::content, std::string, &mre::content::id>>,
            boost::multi_index::ordered_non_unique<boost::multi_index::tag<tags::content>, boost::multi_index::const_mem_fun<mre::content, std::size_t, &mre::content::hash>>,
            boost::multi_index::ordered_non_unique<boost::multi_index::tag<tags::angle>, boost::multi_index::const_mem_fun<mre::content, mre::content::angle_type, &mre::content::angle>>,
            boost::multi_index::ordered_non_unique<boost::multi_index::tag<tags::frozen>, boost::multi_index::const_mem_fun<mre::content, std::size_t, &mre::content::frozen_id>>
        >
    >;

    inline explicit package(const mre::parameters& params): _loaded(false), _parameters(params) {}
    inline explicit package(): _loaded(false) {}
    void save(const std::string& filename) const;
    void load(const std::string& filename);
    inline std::size_t size() const { return _samples.size(); }
    inline bool loaded() const { return _loaded; }
    const mre::content& operator[](const std::string& id) const;
    const mre::parameters& params() const { return _parameters; }
    template<class Archive>
    void serialize(Archive & ar, const unsigned int version) {
        ar & boost::serialization::make_nvp("samples", _samples);
        ar & boost::serialization::make_nvp("params",  _parameters);
    }

    public:
        std::size_t generate(std::size_t contents, std::size_t angles);
    private:
        bool _loaded;
        container  _samples;
        mre::parameters _parameters;
};

我还在序列化下面提到的一组参数。

struct parameters{
    std::size_t degree;
    std::size_t frame_size;

    template<class Archive>
    void serialize(Archive & ar, const unsigned int version) {
        ar & boost::serialization::make_nvp("degree",     degree);
        ar & boost::serialization::make_nvp("frame_size", frame_size);
    }
};

保存、加载、生成如下

void mre::package::save(const std::string& filename) const {
    std::ofstream stream(filename, std::ios::binary);
    try{
        boost::archive::binary_oarchive out(stream, boost::archive::no_tracking);
        std::cout << "serialization library version: " << out.get_library_version() << std::endl;
        out << *this;
    } catch(const std::exception& e){
        std::cout << "Error saving archive: " << e.what() << std::endl;
    }
    stream.close();
}

void mre::package::load(const std::string& filename){
    std::ifstream stream(filename, std::ios::binary);
    try{
        boost::archive::binary_iarchive in(stream, boost::archive::no_tracking);
        std::cout << "serialization library version: " << in.get_library_version() << std::endl;
        in >> *this;
        _loaded = true;
    } catch(const std::exception& e){
        std::cout << "Error loading archive: " << e.what() << std::endl;
    }
    stream.close();
}

std::size_t mre::package::generate(std::size_t contents, std::size_t angles){
    std::size_t count = 0;
    std::size_t v_content = 0;
    while(v_content++ < contents){
        mre::content x = mre::content::generate();
        std::size_t v_angle = 0;
        while(v_angle++ < angles){
            mre::content x_angle = x;
            x_angle.reset_angle_random(); // commenting out this line makes it work
            if (_samples.insert(x_angle).second)
                ++count;
        }
    }
    return count;
}

它看起来像是 boost 多索引容器中的一个错误。但我不知道是否存在任何此类错误。我可以通过在具有最新版本的 boost 库的 Arch Linux 机器中编译 mre 来重现该问题。 mre 还包含一个 docker 目标,它将其编译为 ubuntu 22.04 映像,其中默认 boost 版本为 1.74。可以使用可执行文件

mre
来测试该问题,如下所示。

cd build
cmake .. && make
./mre pack archive_name 10 # to serialize 10 randomly generated contents and save to file named archive_name 
./mre unpack archive_name # to de-serialize

我为了测试一下它可以使用docker编译的不兼容性。

make docker # compiles and generates a file named arc inside build/archives directory of the host machine
./mre unpack archives/arc # which throws exception
c++ boost boost-serialization boost-multi-index
1个回答
0
投票

这个看了半天,没看出来。然而,通过修复种子并验证我们是否获得确定性数据,我注意到结果是“相同的”,但顺序不同。

我注意到默认索引已经多次间接依赖于哈希:

                      ┌─────────────────────┐
                      │                     │
                  ┌−−−−−−−−−−−−−−−−−┐       │
                  ╎      both       ╎       │
                  ╎                 ╎       ▼
┌───────────┐     ╎ ┌─────────────┐ ╎     ┌────────┐     ┌───────────────┐     ┌───────────────────────┐
│ operator< │ ──▶ ╎ │    id()     │ ╎ ──▶ │ hash() │ ──▶ │ boost::hash<> │ ──▶ │ boost::hash_combine() │
└───────────┘     ╎ └─────────────┘ ╎     └────────┘     └───────────────┘     └───────────────────────┘
                  ╎   │             ╎
                  ╎   │             ╎
                  ╎   ▼             ╎
                  ╎ ┌─────────────┐ ╎
                  ╎ │ frozen_id() │ ╎
                  ╎ └─────────────┘ ╎
                  ╎                 ╎
                  └−−−−−−−−−−−−−−−−−┘

由于第一个索引实际上也是唯一的,并且唯一的组成部分是哈希值和角度,这可能会导致不同版本的 Boost ContainerHash 具有不同的唯一性。

Boost

hash_combine
不保证稳定性或可移植性。事实上,大多数常见的哈希函数都没有,例如
std::hash

实际的哈希函数是依赖于实现的,并且不需要满足除上面指定的之外的任何其他质量标准

事实上,根据确定性哈希来持久保存信息在任何地方都是一个逻辑错误,除非您只是在同一进程中重新读取相同的信息,因为

哈希函数只需要在程序的单次执行中为相同的输入产生相同的结果;这允许加盐哈希来防止冲突拒绝服务攻击。

具体来说,

hash_combine

在1.74和1.83之间发生了很多变化。您应该重新考虑您的索引。事实上,我认为依赖于非唯一哈希值的哈希值被用作唯一索引的键 (
identity
)。

修复?

为了避免违反索引期望的总排序契约(基本上就像您通过“编辑”哈希函数来编辑关键字段),我希望哈希值类似于

inline constexpr std::tuple<double, angle_type> key() const { return {_length, _angle}; } friend std::size_t hash_value(content const& c) { return boost::hash_value(c.key()); }
然后也许更像是:

using key_type = std::tuple<double, angle_type>; key_type key() const { return {_length, _angle}; } key_type frozen_key() const { return frozen().key(); } friend size_t hash_value(content const& c) { return boost::hash_value(c.key()); } size_t hash() const { return hash_value(*this); } auto idkey() const { return std::tuple(frozen_key(), key(), angle()); } std::string id() const { return fmt::format("{}", idkey()); } bool operator<(content const& other) const { return idkey() < other.idkey(); }
我用 libfmt 代替 Boost Format,因为它可以直接格式化元组,而无需我做工作:)

基本上,我不会丢弃这些信息,无论如何,这看起来像是代码味道,但也导致索引依赖于非确定性函数。

这是我的激励代码清单,包括可选择使用固定种子的调整:

住在Coliru

#include <boost/archive/binary_iarchive.hpp> #include <boost/archive/binary_oarchive.hpp> #include <boost/archive/text_iarchive.hpp> #include <boost/archive/text_oarchive.hpp> #include <boost/container_hash/hash.hpp> #include <boost/core/nvp.hpp> #include <boost/lexical_cast.hpp> #include <boost/multi_index/key.hpp> #include <boost/multi_index/ordered_index.hpp> #include <boost/multi_index_container.hpp> #include <boost/serialization/utility.hpp> #include <boost/serialization/vector.hpp> #include <cstdint> #include <filesystem> #include <fstream> #include <iostream> #include <random> #include <string> #include <fmt/format.h> #include <fmt/ranges.h> static constexpr boost::archive::archive_flags FLAGS = boost::archive::archive_flags::no_tracking; // {}; #if 1 using IA = boost::archive::binary_iarchive; using OA = boost::archive::binary_oarchive; #else using IA = boost::archive::text_iarchive; using OA = boost::archive::text_oarchive; #endif namespace mre { struct content { friend class boost::serialization::access; using angle_type = std::size_t; inline content(angle_type angle) : _angle(angle) {} inline angle_type angle() const { return _angle; } void reset_angle_random(size_t seed) { static std::mt19937 rng_angle(seed); std::uniform_int_distribution<> angle_dist(0, 180); _angle = angle_dist(rng_angle); } void freeze() { // complicated deterministic business logic _angle = 0; } content frozen() const { mre::content copy(*this); copy.freeze(); return copy; } static content generate() { return generate(std::random_device{}()); } static content generate(size_t seed) { static std::mt19937 rng(seed); std::uniform_real_distribution<> dist_length(-0.5f, 0.5f); mre::content content{0}; content._length = dist_length(rng); content.reset_angle_random(rng()); return content; } template <class Archive> void serialize(Archive& ar, unsigned) { ar& boost::serialization::make_nvp("length", _length); ar& boost::serialization::make_nvp("angle", _angle); } using key_type = std::tuple<double, angle_type>; key_type key() const { return {_length, _angle}; } key_type frozen_key() const { return frozen().key(); } friend size_t hash_value(content const& c) { return boost::hash_value(c.key()); } size_t hash() const { return hash_value(*this); } auto idkey() const { return std::tuple(frozen_key(), key(), angle()); } std::string id() const { return fmt::format("{}", idkey()); } bool operator<(content const& other) const { return idkey() < other.idkey(); } private: double _length; angle_type _angle; private: content() = default; }; struct parameters { std::size_t degree; std::size_t frame_size; template <class Archive> void serialize(Archive& ar, unsigned) { ar& boost::serialization::make_nvp("degree", degree); ar& boost::serialization::make_nvp("frame_size", frame_size); } }; std::ostream& operator<<(std::ostream& stream, mre::parameters const& params); namespace bmi = boost::multi_index; struct package { friend class boost::serialization::access; struct tags { struct id {}; struct content {}; struct angle {}; struct frozen {}; }; using container = bmi::multi_index_container< mre::content, bmi::indexed_by< bmi::ordered_unique<bmi::identity<mre::content>>, bmi::ordered_unique<bmi::tag<tags::id>, bmi::key<&mre::content::id>>, bmi::ordered_non_unique<bmi::tag<tags::content>, bmi::key<&mre::content::hash>>, bmi::ordered_non_unique<bmi::tag<tags::angle>, bmi::key< &mre::content::angle>>, bmi::ordered_non_unique<bmi::tag<tags::frozen>, bmi::key<&mre::content::frozen_key>>>>; inline explicit package(mre::parameters const& params) : _loaded(false), _parameters(params) {} inline explicit package() : _loaded(false) {} void save(std::string const& filename) const; void load(std::string const& filename); inline std::size_t size() const { return _samples.size(); } inline bool loaded() const { return _loaded; } mre::content const& operator[](std::string const& id) const; mre::parameters const& params() const { return _parameters; } template <class Archive> void serialize(Archive& ar, unsigned) { ar& boost::serialization::make_nvp("samples", _samples); ar& boost::serialization::make_nvp("params", _parameters); } public: std::size_t generate(std::size_t contents, std::size_t angles, size_t seed); std::size_t generate(std::size_t contents, std::size_t angles) { return generate(contents, angles, std::random_device{}()); } private: bool _loaded; container _samples; mre::parameters _parameters; }; } // namespace mre // { sources std::ostream& mre::operator<<(std::ostream& stream, mre::parameters const& params) { stream << "params {" << std::endl; stream << " degree: " << params.degree << std::endl; stream << " frame_size: " << params.frame_size << std::endl; stream << "}"; return stream; } void mre::package::save(std::string const& filename) const { std::ofstream stream(filename, std::ios::binary); try { OA out(stream, FLAGS); std::cout << "serialization library version: " << out.get_library_version() << std::endl; out << *this; } catch (std::exception const& e) { std::cout << "Error saving archive: " << e.what() << std::endl; } stream.close(); } void mre::package::load(std::string const& filename) { std::ifstream stream(filename, std::ios::binary); try { IA in(stream, FLAGS); std::cout << "deserialization library version: " << in.get_library_version() << std::endl; in >> *this; _loaded = true; } catch (std::exception const& e) { std::cout << "Error loading archive: " << e.what() << std::endl; } stream.close(); } std::size_t mre::package::generate(std::size_t contents, std::size_t angles, size_t seed) { std::size_t count = 0; std::size_t v_content = 0; while (v_content++ < contents) { mre::content x = mre::content::generate(seed); std::size_t v_angle = 0; while (v_angle++ < angles) { mre::content x_angle = x; x_angle.reset_angle_random(seed); // commenting out this line makes it work if (_samples.insert(x_angle).second) ++count; } } return count; } int main(int argc, char** argv) { std::cout << "Boost " << BOOST_VERSION << "\n"; if (argc < 2) { std::cout << "Usage: " << std::endl << argv[0] << " pack FILENAME N" << std::endl << argv[0] << " unpack FILENAME" << std::endl; return 1; } if (argv[1] == std::string("pack")) { auto params = mre::parameters{.degree = 4, .frame_size = 128}; mre::package package(params); std::size_t count = package.generate(boost::lexical_cast<std::size_t>(argv[3]), 4, 0xcafebabe); package.save(argv[2]); std::cout << "serialized: " << count << " contents" << std::endl; return 0; } else if (argv[1] == std::string("unpack")) { mre::package package; package.load(argv[2]); if (package.loaded()) { std::cout << "Package loaded: " << package.size() << std::endl << package.params() << std::endl; package.save("roundtrip"); return 0; } return 1; } else { std::cout << "Usage: " << std::endl << argv[0] << " pack FILENAME N" << std::endl << argv[0] << " unpack FILENAME" << std::endl; return 1; } }
TL;博士

基本上,永远不要使用哈希值作为键。此外,不要依赖算法的确定性,除非已发布的加密摘要。

© www.soinside.com 2019 - 2024. All rights reserved.