From 9a34655ce6ea123149e7b60debdae385a1a35893 Mon Sep 17 00:00:00 2001 From: Manlio Morini Date: Tue, 30 Apr 2024 17:38:45 +0200 Subject: [PATCH] feat: add support for team-based oracles --- src/kernel/gp/src/oracle.h | 67 +++++++++++- src/kernel/gp/src/oracle.tcc | 192 +++++++++++++++++++++++++++++++---- src/test/oracle.cc | 85 +++++++++++++++- src/test/team.cc | 4 +- 4 files changed, 320 insertions(+), 28 deletions(-) diff --git a/src/kernel/gp/src/oracle.h b/src/kernel/gp/src/oracle.h index f8a2647..92c1dc1 100644 --- a/src/kernel/gp/src/oracle.h +++ b/src/kernel/gp/src/oracle.h @@ -213,17 +213,18 @@ class basic_class_oracle : public core_class_oracle, /// /// Oracle for the Gaussian Distribution Classification. /// +/// \tparam I individual /// \tparam S stores the individual inside vs keep a reference only /// \tparam N stores the name of the classes vs doesn't store the names /// /// \see /// ultra::src::::gaussian_evaluator for further details. /// -template +template class basic_gaussian_oracle : public basic_class_oracle { public: - basic_gaussian_oracle(const P &, dataframe &); + basic_gaussian_oracle(const I &, dataframe &); basic_gaussian_oracle(std::istream &, const symbol_set &); [[nodiscard]] classification_result tag( @@ -244,13 +245,69 @@ class basic_gaussian_oracle : public basic_class_oracle [[nodiscard]] std::string serialize_id() const final { return SERIALIZE_ID; } // *** Private data members *** - basic_reg_oracle oracle_; + basic_reg_oracle oracle_; // `gauss_dist[i]` contains the gaussian distribution of the i-th class of // the classification problem. std::vector> gauss_dist_ {}; }; +// *********************************************************************** +// * Extensions to support teams * +// *********************************************************************** + +/// +/// An helper class for extending classification schemes to teams. +/// +/// \tparam I type of individual +/// \tparam S stores the individual inside vs keep a reference only +/// \tparam N stores the name of the classes vs doesn't store the names +/// \tparam L the basic classificator that must be extended +/// \tparam C composition method for team's member responses +/// +template class L, + team_composition C = team_composition::standard> +class team_class_oracle : public basic_class_oracle +{ +public: + template team_class_oracle(const team &, dataframe &, + Args &&...); + team_class_oracle(std::istream &, const symbol_set &); + + [[nodiscard]] classification_result tag( + const std::vector &) const final; + + [[nodiscard]] bool is_valid() const final; + + static const std::string SERIALIZE_ID; + +private: + bool save(std::ostream &) const final; + [[nodiscard]] std::string serialize_id() const final; + + // The components of the team never store the names of the classes. If we + // need the names, the master class will memorize them. + std::vector> team_; + + src::class_t classes_ {}; +}; + +/// +/// Gaussian Distribution Classification specialization for teams. +/// +/// \tparam I type of individual +/// \tparam S stores the individual inside vs keep a reference only +/// \tparam N stores the name of the classes vs doesn't store the names +/// +template +class basic_gaussian_oracle, S, N> + : public team_class_oracle +{ +public: + using basic_gaussian_oracle::team_class_oracle::team_class_oracle; +}; + // *********************************************************************** // * Template aliases to simplify the syntax and help the user * // *********************************************************************** @@ -268,7 +325,9 @@ class gaussian_oracle : public basic_gaussian_oracle public: using gaussian_oracle::basic_gaussian_oracle::basic_gaussian_oracle; }; -template gaussian_oracle(const P &) -> gaussian_oracle

; +template gaussian_oracle(const P &, dataframe &) -> gaussian_oracle

; + +template gaussian_oracle(const P &, dataframe &) -> gaussian_oracle

; #include "kernel/gp/src/oracle.tcc" } // namespace src diff --git a/src/kernel/gp/src/oracle.tcc b/src/kernel/gp/src/oracle.tcc index 5ffddbf..941ef2b 100644 --- a/src/kernel/gp/src/oracle.tcc +++ b/src/kernel/gp/src/oracle.tcc @@ -21,10 +21,16 @@ template const std::string basic_reg_oracle::SERIALIZE_ID( Team

? "TEAM_REG_ORACLE" : "REG_ORACLE"); -template -const std::string basic_gaussian_oracle::SERIALIZE_ID( +template +const std::string basic_gaussian_oracle::SERIALIZE_ID( "GAUSSIAN_ORACLE"); +template class L, + team_composition C> +const std::string team_class_oracle::SERIALIZE_ID( + "TEAM_" + L::SERIALIZE_ID); + /// /// \param[in] prg the program (individual/team) to be lambdified /// @@ -32,6 +38,8 @@ template basic_reg_oracle::basic_reg_oracle(const P &prg) : internal::reg_oracle_storage(prg) { + Expects(!prg.empty()); + Ensures(is_valid()); } @@ -179,14 +187,17 @@ std::string basic_class_oracle::name(const value_t &a) const } /// -/// \param[in] prg program "to be transformed" into an oracle +/// \param[in] ind program "to be transformed" into an oracle /// \param[in] d the training set /// -template -basic_gaussian_oracle::basic_gaussian_oracle(const P &prg, +template +basic_gaussian_oracle::basic_gaussian_oracle(const I &ind, dataframe &d) - : basic_class_oracle(d), oracle_(prg), gauss_dist_(d.classes()) + : basic_class_oracle(d), oracle_(ind), gauss_dist_(d.classes()) { + static_assert(Individual); + + Expects(!ind.empty()); Expects(d.classes() > 1); fill_vector(d); @@ -200,13 +211,13 @@ basic_gaussian_oracle::basic_gaussian_oracle(const P &prg, /// \param[in] in input stream /// \param[in] ss active symbol set /// -template -basic_gaussian_oracle::basic_gaussian_oracle(std::istream &in, +template +basic_gaussian_oracle::basic_gaussian_oracle(std::istream &in, const symbol_set &ss) : basic_class_oracle(), oracle_(in, ss) { static_assert( - S, "gaussian_lambda_f requires storage space for de-serialization"); + S, "gaussian_oracle requires storage space for de-serialization"); std::size_t n; if (!(in >> n)) @@ -234,8 +245,8 @@ basic_gaussian_oracle::basic_gaussian_oracle(std::istream &in, /// /// \param[in] d the training set /// -template -void basic_gaussian_oracle::fill_vector(dataframe &d) +template +void basic_gaussian_oracle::fill_vector(dataframe &d) { Expects(d.classes() > 1); @@ -265,8 +276,8 @@ void basic_gaussian_oracle::fill_vector(dataframe &d) /// value is in the `[0,1]` interval and the sum of all the /// confidence levels of each class equals `1`) /// -template -classification_result basic_gaussian_oracle::tag( +template +classification_result basic_gaussian_oracle::tag( const std::vector &ex) const { const auto res(oracle_(ex)); @@ -316,8 +327,8 @@ classification_result basic_gaussian_oracle::tag( /// \param[out] out output stream /// \return `true` on success /// -template -bool basic_gaussian_oracle::save(std::ostream &out) const +template +bool basic_gaussian_oracle::save(std::ostream &out) const { if (!oracle_.save(out)) return false; @@ -335,12 +346,157 @@ bool basic_gaussian_oracle::save(std::ostream &out) const /// /// \return `true` if the object passes the internal consistency check /// -template -bool basic_gaussian_oracle::is_valid() const +template +bool basic_gaussian_oracle::is_valid() const { return true; } +/// +/// \param[in] t team "to be transformed" into an oracle +/// \param[in] d the training set +/// \param[in] args auxiliary parameters for the specific oracle +/// +template class L, team_composition C> +template +team_class_oracle::team_class_oracle(const team &t, + dataframe &d, + Args&&... args) + : basic_class_oracle(d), classes_(d.classes()) +{ + team_.reserve(t.size()); + for (const auto &ind : t) + team_.emplace_back(ind, d, std::forward(args)...); +} + +/// +/// Constructs the object reading data from an input stream. +/// +/// \param[in] in input stream +/// \param[in] ss active symbol set +/// +template class L, team_composition C> +team_class_oracle::team_class_oracle(std::istream &in, + const symbol_set &ss) + : basic_class_oracle() +{ + static_assert( + S, "team_class_oracle requires storage space for de-serialization"); + + if (!(in >> classes_)) + throw exception::data_format("Cannot read number of classes"); + + std::size_t s; + if (!(in >> s)) + throw exception::data_format("Cannot read team size"); + + team_.reserve(s); + for (std::size_t i(0); i < s; ++i) + team_.emplace_back(in, ss); + + if (!internal::class_names::load(in)) + throw exception::data_format("Cannot read class_names"); +} + +/// +/// Specialized method for teams. +/// +/// \param[in] instance data to be classified +/// \return the class of `instance` (numerical id) and the +/// confidence level (in the `[0,1]` interval) +/// +/// * `team_composition::mv` the class which most of the individuals predict +/// for a given example is selected as team output. +/// * `team_composition::wta` the winner is the individual with the highest +/// confidence in its decision. Specialization may emerge if different +/// members of the team win this contest for different fitness cases (of +/// course, it isn't a feasible alternative to select the member with the +/// best fitness. Then a decision on unknown data is only possible if the +/// right outputs are known in advance and is not made by the team itself). +/// +template class L, team_composition C> +classification_result team_class_oracle::tag( + const std::vector &instance) const +{ + if constexpr (C == team_composition::wta) + { + const auto size(team_.size()); + auto best(team_[0].tag(instance)); + + for (std::size_t i(1); i < size; ++i) + { + const auto res(team_[i].tag(instance)); + + if (res.sureness > best.sureness) + best = res; + } + + return best; + } + else if constexpr (C == team_composition::mv) + { + std::vector votes(classes_); + + for (const auto &oracle : team_) + ++votes[oracle.tag(instance).label]; + + src::class_t max(0); + for (auto i(max + 1); i < classes_; ++i) + if (votes[i] > votes[max]) + max = i; + + return {max, static_cast(votes[max]) + / static_cast(team_.size())}; + } +} + +/// +/// Saves the oracle team on persistent storage. +/// +/// \param[out] out output stream +/// \return `true` on success +/// +template class L, team_composition C> +bool team_class_oracle::save(std::ostream &out) const +{ + if (!(out << classes_ << '\n')) + return false; + + if (!(out << team_.size() << '\n')) + return false; + + for (const auto &i : team_) + if (!i.save(out)) + return false; + + return internal::class_names::save(out); +} + +/// +/// \return class ID used for serialization +/// +template class L, team_composition C> +std::string team_class_oracle::serialize_id() const +{ + Expects(team_.size()); + return "TEAM_" + L::SERIALIZE_ID; +} + +/// +/// \return `true` if the object passes the internal consistency check +/// +template class L, team_composition C> +bool team_class_oracle::is_valid() const +{ + return classes_ > 1; +} + namespace serialize::oracle { @@ -376,7 +532,7 @@ std::unique_ptr load(std::istream &in, const symbol_set &ss) { insert>(reg_oracle::SERIALIZE_ID); //insert>(dyn_slot_lambda_f::SERIALIZE_ID); - //insert>(gaussian_lambda_f::SERIALIZE_ID); + insert>(gaussian_oracle::SERIALIZE_ID); //insert>(binary_lambda_f::SERIALIZE_ID); } diff --git a/src/test/oracle.cc b/src/test/oracle.cc index d328f33..982000b 100644 --- a/src/test/oracle.cc +++ b/src/test/oracle.cc @@ -265,8 +265,8 @@ void test_team_of_one(ultra::src::problem &pr) if (has_value(out_i)) { - const auto v1(std::get(out_i)); - const auto v2(std::get(out_t)); + const auto v1(lexical_cast(out_i)); + const auto v2(lexical_cast(out_t)); CHECK(almost_equal(v1, v2)); } @@ -276,6 +276,81 @@ void test_team_of_one(ultra::src::problem &pr) } } +template class L, unsigned P = 0> +void test_team(ultra::src::problem &pr) +{ + using namespace ultra; + + for (unsigned cycles(1000); cycles; --cycles) + { + const gp::individual ind1(pr); + const gp::individual ind2(pr); + const gp::individual ind3(pr); + + const auto oracle1(build()(ind1, pr.data())); + const auto oracle2(build()(ind2, pr.data())); + const auto oracle3(build()(ind3, pr.data())); + + const team t{{ind1, ind2, ind3}}; + const auto ts(t.size()); + const auto oracle_t(build, P>()(t, pr.data())); + + for (const auto &example : pr.data()) + { + const std::vector out = + { + oracle1(example.input), oracle2(example.input), oracle3(example.input) + }; + const std::vector names = + { + oracle1.name(out[0]), oracle2.name(out[1]), oracle3.name(out[2]) + }; + const std::vector tags = + { + oracle1.tag(example.input), oracle2.tag(example.input), + oracle3.tag(example.input) + }; + + for (std::size_t j(0); j < ts; ++j) + CHECK(std::get(out[j]) == tags[j].label); + + std::string s_best(names[0]); + +#if defined(TEST_MV) + std::map votes; + + for (std::size_t j(0); j < ts; ++j) + { + if (votes.find(names[j]) == votes.end()) + votes[names[j]] = 1; + else + ++votes[names[j]]; + } + + unsigned v_best(0); + + for (auto &v : votes) + if (v.second > v_best) + { + s_best = v.first; + v_best = v.second; + } +#elif defined(TEST_WTA) + src::class_t c_best(0); + + for (std::size_t j(1); j < ts; ++j) + if (tags[j].sureness > tags[c_best].sureness) + { + s_best = names[j]; + c_best = j; + } +#endif + + CHECK(s_best == oracle_t.name(oracle_t(example.input))); + } + } +} + struct fixture { fixture() { pr.params.init(); } @@ -289,6 +364,7 @@ TEST_SUITE("ORACLE") TEST_CASE_FIXTURE(fixture, "reg_oracle") { using namespace ultra; + log::reporting_level = log::lWARNING; CHECK(pr.data().read_csv(sr) == SR_COUNT); pr.setup_symbols(); @@ -423,15 +499,16 @@ TEST_CASE_FIXTURE(fixture, "reg_oracle serialization") TEST_CASE_FIXTURE(fixture, "gaussian_oracle") { using namespace ultra; + log::reporting_level = log::lWARNING; CHECK(pr.data().read_csv(iris) == IRIS_COUNT); pr.setup_symbols(); // GAUSSIAN ORACLE TEAM OF ONE INDIVIDUAL. - //test_team_of_one(pr); + test_team_of_one(pr); // GAUSSIAN ORACLE TEAM OF RANDOM INDIVIDUALS. - //test_team(pr); + test_team(pr); } } // TEST_SUITE("ORACLE") diff --git a/src/test/team.cc b/src/test/team.cc index edf500f..361c517 100644 --- a/src/test/team.cc +++ b/src/test/team.cc @@ -28,8 +28,8 @@ TEST_CASE("Concept") { using namespace ultra; - static_assert(Team>); - static_assert(!Team); + REQUIRE(Team>); + REQUIRE(!Team); } TEST_CASE_FIXTURE(fixture1, "Random creation")