Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text embeddings master sync #14159

Closed
wants to merge 14 commits into from
23 changes: 23 additions & 0 deletions vendor/bat-native-ads/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@ source_set("ads") {
"src/bat/ads/internal/ads/serving/targeting/models/contextual/text_classification/text_classification_aliases.h",
"src/bat/ads/internal/ads/serving/targeting/models/contextual/text_classification/text_classification_model.cc",
"src/bat/ads/internal/ads/serving/targeting/models/contextual/text_classification/text_classification_model.h",
"src/bat/ads/internal/ads/serving/targeting/models/contextual/text_embedding/text_embedding_aliases.h",
"src/bat/ads/internal/ads/serving/targeting/models/model_interface.h",
"src/bat/ads/internal/ads/serving/targeting/top_segments.cc",
"src/bat/ads/internal/ads/serving/targeting/top_segments.h",
Expand Down Expand Up @@ -564,6 +565,8 @@ source_set("ads") {
"src/bat/ads/internal/base/search_engine/search_engines.h",
"src/bat/ads/internal/base/strings/string_conversions_util.cc",
"src/bat/ads/internal/base/strings/string_conversions_util.h",
"src/bat/ads/internal/base/strings/string_html_parse_util.cc",
"src/bat/ads/internal/base/strings/string_html_parse_util.h",
"src/bat/ads/internal/base/strings/string_strip_util.cc",
"src/bat/ads/internal/base/strings/string_strip_util.h",
"src/bat/ads/internal/base/time/time_constraint_util.cc",
Expand Down Expand Up @@ -829,6 +832,8 @@ source_set("ads") {
"src/bat/ads/internal/features/purchase_intent_features.h",
"src/bat/ads/internal/features/text_classification_features.cc",
"src/bat/ads/internal/features/text_classification_features.h",
"src/bat/ads/internal/features/text_embedding_features.cc",
"src/bat/ads/internal/features/text_embedding_features.h",
"src/bat/ads/internal/geographic/country/supported_country_codes.h",
"src/bat/ads/internal/geographic/subdivision/get_subdivision_url_request_builder.cc",
"src/bat/ads/internal/geographic/subdivision/get_subdivision_url_request_builder.h",
Expand Down Expand Up @@ -906,10 +911,14 @@ source_set("ads") {
"src/bat/ads/internal/ml/model/linear/linear.h",
"src/bat/ads/internal/ml/pipeline/pipeline_info.cc",
"src/bat/ads/internal/ml/pipeline/pipeline_info.h",
"src/bat/ads/internal/ml/pipeline/pipeline_embedding_info.cc",
"src/bat/ads/internal/ml/pipeline/pipeline_embedding_info.h",
"src/bat/ads/internal/ml/pipeline/pipeline_util.cc",
"src/bat/ads/internal/ml/pipeline/pipeline_util.h",
"src/bat/ads/internal/ml/pipeline/text_processing/text_processing.cc",
"src/bat/ads/internal/ml/pipeline/text_processing/text_processing.h",
"src/bat/ads/internal/ml/pipeline/text_processing/embedding_processing.cc",
"src/bat/ads/internal/ml/pipeline/text_processing/embedding_processing.h",
"src/bat/ads/internal/ml/transformation/hash_vectorizer.cc",
"src/bat/ads/internal/ml/transformation/hash_vectorizer.h",
"src/bat/ads/internal/ml/transformation/hashed_ngrams_transformation.cc",
Expand Down Expand Up @@ -997,6 +1006,18 @@ source_set("ads") {
"src/bat/ads/internal/processors/behavioral/purchase_intent/purchase_intent_signal_info.h",
"src/bat/ads/internal/processors/contextual/text_classification/text_classification_processor.cc",
"src/bat/ads/internal/processors/contextual/text_classification/text_classification_processor.h",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_info.cc",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_info.h",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_processor.cc",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_processor.h",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_events.cc",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_events.h",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_event_info.cc",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_event_info.h",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_event_info_aliases.h",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_events_database_table.cc",
"src/bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_events_database_table.h",
"src/bat/ads/internal/processors/processor_interface.h",
"src/bat/ads/internal/resources/behavioral/anti_targeting/anti_targeting_features.cc",
"src/bat/ads/internal/resources/behavioral/anti_targeting/anti_targeting_features.h",
"src/bat/ads/internal/resources/behavioral/anti_targeting/anti_targeting_info.cc",
Expand Down Expand Up @@ -1024,6 +1045,8 @@ source_set("ads") {
"src/bat/ads/internal/resources/behavioral/purchase_intent/purchase_intent_site_info.h",
"src/bat/ads/internal/resources/contextual/text_classification/text_classification_resource.cc",
"src/bat/ads/internal/resources/contextual/text_classification/text_classification_resource.h",
"src/bat/ads/internal/resources/contextual/text_embedding/text_embedding_resource.cc",
"src/bat/ads/internal/resources/contextual/text_embedding/text_embedding_resource.h",
"src/bat/ads/internal/resources/country_components.h",
"src/bat/ads/internal/resources/language_components.h",
"src/bat/ads/internal/resources/parsing_result.h",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* Copyright (c) 2020 The Brave Authors. All rights reserved.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_SERVING_TARGETING_MODELS_CONTEXTUAL_TEXT_EMBEDDING_TEXT_EMBEDDING_ALIASES_H_
#define BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_SERVING_TARGETING_MODELS_CONTEXTUAL_TEXT_EMBEDDING_TEXT_EMBEDDING_ALIASES_H_

#include <vector>

#include "base/containers/circular_deque.h"
#include "bat/ads/internal/ml/data/vector_data.h"

namespace ads {
namespace targeting {

using TextEmbeddingList =
base::circular_deque<ml::VectorData>;

} // namespace targeting
} // namespace ads

#endif // BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_SERVING_TARGETING_MODELS_CONTEXTUAL_TEXT_EMBEDDING_TEXT_EMBEDDING_ALIASES_H_
7 changes: 7 additions & 0 deletions vendor/bat-native-ads/src/bat/ads/internal/ads_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@
#include "bat/ads/internal/processors/behavioral/bandits/epsilon_greedy_bandit_processor.h"
#include "bat/ads/internal/processors/behavioral/purchase_intent/purchase_intent_processor.h"
#include "bat/ads/internal/processors/contextual/text_classification/text_classification_processor.h"
#include "bat/ads/internal/processors/contextual/text_embedding/text_embedding_processor.h"
#include "bat/ads/internal/resources/behavioral/anti_targeting/anti_targeting_resource.h"
#include "bat/ads/internal/resources/behavioral/bandits/epsilon_greedy_bandit_resource.h"
#include "bat/ads/internal/resources/behavioral/purchase_intent/purchase_intent_resource.h"
#include "bat/ads/internal/resources/contextual/text_classification/text_classification_resource.h"
#include "bat/ads/internal/resources/contextual/text_embedding/text_embedding_resource.h"
#include "bat/ads/internal/resources/resource_manager.h"
#include "bat/ads/internal/studies/studies_util.h"
#include "bat/ads/internal/tabs/tab_manager.h"
Expand Down Expand Up @@ -99,6 +101,8 @@ AdsImpl::AdsImpl(AdsClient* ads_client)
purchase_intent_resource_ = std::make_unique<resource::PurchaseIntent>();
text_classification_resource_ =
std::make_unique<resource::TextClassification>();
text_embedding_resource_ =
std::make_unique<resource::TextEmbedding>();

epsilon_greedy_bandit_processor_ =
std::make_unique<processor::EpsilonGreedyBandit>();
Expand All @@ -107,6 +111,9 @@ AdsImpl::AdsImpl(AdsClient* ads_client)
text_classification_processor_ =
std::make_unique<processor::TextClassification>(
text_classification_resource_.get());
text_embedding_processor_ =
std::make_unique<processor::TextEmbedding>(
text_embedding_resource_.get());

inline_content_ad_ = std::make_unique<InlineContentAd>(
account_.get(), transfer_.get(), subdivision_targeting_.get(),
Expand Down
4 changes: 4 additions & 0 deletions vendor/bat-native-ads/src/bat/ads/internal/ads_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@ namespace processor {
class EpsilonGreedyBandit;
class PurchaseIntent;
class TextClassification;
class TextEmbedding;
} // namespace processor

namespace resource {
class AntiTargeting;
class EpsilonGreedyBandit;
class PurchaseIntent;
class TextClassification;
class TextEmbedding;
} // namespace resource

class Account;
Expand Down Expand Up @@ -256,11 +258,13 @@ class AdsImpl final : public Ads,
epsilon_greedy_bandit_resource_;
std::unique_ptr<resource::PurchaseIntent> purchase_intent_resource_;
std::unique_ptr<resource::TextClassification> text_classification_resource_;
std::unique_ptr<resource::TextEmbedding> text_embedding_resource_;

std::unique_ptr<processor::EpsilonGreedyBandit>
epsilon_greedy_bandit_processor_;
std::unique_ptr<processor::PurchaseIntent> purchase_intent_processor_;
std::unique_ptr<processor::TextClassification> text_classification_processor_;
std::unique_ptr<processor::TextEmbedding> text_embedding_processor_;

std::unique_ptr<InlineContentAd> inline_content_ad_;
std::unique_ptr<NewTabPageAd> new_tab_page_ad_;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* Copyright (c) 2022 The Brave Authors. All rights reserved.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <regex>

#include "bat/ads/internal/base/strings/string_html_parse_util.h"
ptjames marked this conversation as resolved.
Show resolved Hide resolved

namespace ads {

std::string FindFirstRegexMatch(std::string& search_text, const std::string& rgx_str) {
std::string match_str;
std::regex rgx(rgx_str);
std::smatch match;
while (std::regex_search(search_text, match, rgx)) {
for (auto x:match) {
match_str = x;
break;
}
if (match_str.length() > 0) {
break;
}
match_str = match.suffix().str();
}
return match_str;
}

std::string ParseTagAttribute(const std::string& html, const std::string& tag_substr, const std::string& tag_attribute) {
std::string attribute_text;
std::string search_text = html;
attribute_text = FindFirstRegexMatch(search_text, "<[^>]*" + tag_substr + "[^<]*>");
attribute_text = FindFirstRegexMatch(attribute_text, tag_attribute + "=.*>");
if (attribute_text.length() > tag_attribute.length()) {
attribute_text = attribute_text.substr(tag_attribute.length(), attribute_text.length() - tag_attribute.length());
}
return attribute_text;
}

} // namespace ads
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/* Copyright (c) 2022 The Brave Authors. All rights reserved.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_BASE_STRINGS_STRING_HTML_PARSE_UTIL_H_
#define BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_BASE_STRINGS_STRING_HTML_PARSE_UTIL_H_

#include <string>

namespace ads {

std::string FindFirstRegexMatch(std::string& search_text, const std::string& rgx_str);

std::string ParseTagAttribute(const std::string& html, const std::string& tag_substr, const std::string& tag_attribute);

} // namespace ads

#endif // BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_BASE_STRINGS_STRING_STRING_HTML_PARSE_UTIL_H_
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/* Copyright (c) 2022 The Brave Authors. All rights reserved.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "bat/ads/internal/features/text_embedding_features.h"

#include "base/metrics/field_trial_params.h"

namespace ads {
namespace targeting {
namespace features {

namespace {

constexpr char kFeatureName[] = "TextEmbedding";

constexpr char kFieldTrialParameterPageEmbeddingsHistorySize[] =
"page_embeddings_history_size";
const int kDefaultPageEmbeddingsHistorySize = 5;

constexpr char kFieldTrialParameterResourceVersion[] =
"text_embedding_resource_version";

constexpr int kDefaultResourceVersion = 1;

} // namespace

const base::Feature kTextEmbedding{kFeatureName,
base::FEATURE_ENABLED_BY_DEFAULT};

bool IsTextEmbeddingEnabled() {
return base::FeatureList::IsEnabled(kTextEmbedding);
}

int GetTextEmbeddingsHistorySize() {
return GetFieldTrialParamByFeatureAsInt(
kTextEmbedding, kFieldTrialParameterPageEmbeddingsHistorySize,
kDefaultPageEmbeddingsHistorySize);
}

int GetTextEmbeddingResourceVersion() {
return GetFieldTrialParamByFeatureAsInt(kTextEmbedding,
kFieldTrialParameterResourceVersion,
kDefaultResourceVersion);
}

} // namespace features
} // namespace targeting
} // namespace ads
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/* Copyright (c) 2022 The Brave Authors. All rights reserved.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_FEATURES_TEXT_EMBEDDING_FEATURES_H_
#define BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_FEATURES_TEXT_EMBEDDING_FEATURES_H_

#include "base/feature_list.h"

namespace ads {
namespace targeting {
namespace features {

extern const base::Feature kTextEmbedding;

bool IsTextEmbeddingEnabled();

int GetTextEmbeddingsHistorySize();

int GetTextEmbeddingResourceVersion();

} // namespace features
} // namespace targeting
} // namespace ads

#endif // BRAVE_VENDOR_BAT_NATIVE_ADS_SRC_BAT_ADS_INTERNAL_FEATURES_TEXT_EMBEDDING_FEATURES_H_
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
namespace ads {
namespace database {

constexpr int32_t kVersion = 24;
constexpr int32_t kCompatibleVersion = 24;
constexpr int32_t kVersion = 25;
constexpr int32_t kCompatibleVersion = 25;

} // namespace database
} // namespace ads
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@

#include <functional>
#include <utility>
#include <iostream>

#include "base/check.h"
#include "bat/ads/internal/account/deposits/deposits_database_table.h"
#include "bat/ads/internal/account/transactions/transactions_database_table.h"
#include "bat/ads/internal/ads/ad_events/ad_events_database_table.h"
#include "bat/ads/internal/processors/contextual/text_embedding/text_embedding_html_events_database_table.h"
#include "bat/ads/internal/ads_client_helper.h"
#include "bat/ads/internal/base/database/database_transaction_util.h"
#include "bat/ads/internal/conversions/conversion_queue_database_table.h"
Expand Down Expand Up @@ -44,6 +46,12 @@ void Migration::FromVersion(const int from_version, ResultCallback callback) {
ToVersion(transaction.get(), i);
}

std::cout << "\n";
std::cout << from_version;
std::cout << "\n";
std::cout << to_version;
std::cout << "\n";

mojom::DBCommandPtr command = mojom::DBCommand::New();
command->type = mojom::DBCommand::Type::MIGRATE;

Expand All @@ -69,6 +77,9 @@ void Migration::ToVersion(mojom::DBTransaction* transaction,
table::AdEvents ad_events_database_table;
ad_events_database_table.Migrate(transaction, to_version);

table::TextEmbeddingHTMLEvents text_embedding_html_events_database_table;
text_embedding_html_events_database_table.Migrate(transaction, to_version);

table::Transactions transactions_database_table;
transactions_database_table.Migrate(transaction, to_version);

Expand Down
Loading