Skip to content

Commit

Permalink
Add some logs, Hammer missed,entropy normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
CovERUshKA committed Jan 31, 2025
1 parent 32a06c7 commit feb4571
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 33 deletions.
23 changes: 13 additions & 10 deletions src/engine/server/NN/ModelManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,23 @@ namespace fs = std::filesystem;

int64_t n_in = 40;
int64_t n_out = 7;
int64_t h_start = 1024;
double std_dev = 1;
double learning_rate = 5e-5; // Default: 5e-5
double actor_learning_rate = 2e-4; // Default: 5e-5
double critic_learning_rate = 1e-3; // Default: 1e-4
double critic_learning_rate = 5e-4; // Default: 1e-4
//double weight_decay = 0.0001;

int64_t mini_batch_size = 8000; // 4096, 8192, 16384, 32768
int64_t count_mini_batches = 1;
int64_t max_mini_batch_size = 8000; // 4096, 8192, 16384, 32768
int64_t ppo_epochs = 4;
double ent_coef = 2e-3; // Entropy coefficient
double ent_coef = 1e-2; // Entropy coefficient
double min_ent_coef = 1e-4;
double ent_decay_factor = 0.95;
double clip_param = 0.2; // Default: 0.2
float gamma = 0.99f; // Default: 0.99f Discount factor
float lambda = 0.97f; // GAE lambda
float lambda = 0.95f; // GAE lambda

float old_models_train = 0.2f; // Percent of old models
int count_cached_old_models = 100; // old_models_train * ((float)count_bots / 2.f)
Expand Down Expand Up @@ -115,8 +116,8 @@ ModelManager::ModelManager(bool is_training, std::string train_folder, size_t ba

torch::manual_seed(seed);

ac_update->Initialize(n_in, n_out, std_dev);
ac_work->Initialize(n_in, n_out, std_dev);
ac_update->Initialize(n_in, n_out, h_start, std_dev);
ac_work->Initialize(n_in, n_out, h_start, std_dev);

//graph_main_input_tensor = torch::empty({(int)(count_bots - old_bots_indexes.size()), n_in}, torch::kCUDA);
//graph_main_output_tensor = torch::empty({(int)(count_bots - old_bots_indexes.size()), n_out}, torch::kCUDA);
Expand Down Expand Up @@ -146,7 +147,7 @@ ModelManager::ModelManager(bool is_training, std::string train_folder, size_t ba
param_groups.push_back(torch::optim::OptimizerParamGroup({ac_update->critic_network->parameters()},
std::make_unique<torch::optim::AdamOptions>(critic_learning_rate)));
param_groups.push_back(torch::optim::OptimizerParamGroup({ac_update->log_std_},
std::make_unique<torch::optim::AdamOptions>(actor_learning_rate)));
std::make_unique<torch::optim::AdamOptions>(actor_learning_rate / 2.)));

opt = std::make_shared<torch::optim::Adam>(param_groups);

Expand Down Expand Up @@ -281,7 +282,7 @@ bool ModelManager::LoadModels(std::string folder_path, std::string main_model_na
std::string new_model_path = new_models_folder + "\\" + model_filename;

ActorCritic old_model;
old_model->Initialize(n_in, n_out, std_dev);
old_model->Initialize(n_in, n_out, h_start, std_dev);
torch::load(old_model, model_path);
old_model->eval();
old_model->to(device);
Expand Down Expand Up @@ -627,7 +628,7 @@ std::vector<ModelOutput> ModelManager::Decide(
hooks = hooks.reshape({(int)input_inputs.size()}).to(torch::kBool).to(torch::kCPU, true);
hammers = hammers.reshape({(int)input_inputs.size()}).to(torch::kBool).to(torch::kCPU, true);

// When CPC -> GPU no synchronize needed, but needed when GPU -> CPU https://pytorch.org/tutorials/intermediate/pinmem_nonblock.html
// When CPU -> GPU no synchronization needed, but needed when GPU -> CPU https://pytorch.org/tutorials/intermediate/pinmem_nonblock.html
cudaStreamSynchronize(c10::cuda::getCurrentCUDAStream());

auto angle_x_vec = angle_x.accessor<float, 1>(); // at::Half float
Expand Down Expand Up @@ -783,7 +784,8 @@ void ModelManager::Update(double avg_reward, bool cache_model, bool &updated,
double &avg_actor_grad_norm, double &avg_critic_grad_norm,
double &avg_actor_weight_norm, double &avg_critic_weight_norm,
double &avg_actor_activation_mean, double &avg_actor_activation_std,
double &critic_mean_absolute_error, double &critic_correlation_coefficient)
double &critic_mean_absolute_error, double &critic_correlation_coefficient,
double &avg_angle_entropy, double &avg_hook_entropy, double &avg_hammer_entropy, double &avg_direction_entropy)
{
// Update.
if(!ac_work->is_training())
Expand All @@ -803,7 +805,7 @@ void ModelManager::Update(double avg_reward, bool cache_model, bool &updated,
if(cache_model)
{
ActorCritic old_model;
old_model->Initialize(n_in, n_out, std_dev);
old_model->Initialize(n_in, n_out, h_start, std_dev);
old_model->copy_from(ac_work.get());
old_model->eval();
old_ac.push_back(old_model);
Expand All @@ -825,6 +827,7 @@ void ModelManager::Update(double avg_reward, bool cache_model, bool &updated,
avg_actor_weight_norm, avg_critic_weight_norm,
avg_actor_activation_mean, avg_actor_activation_std,
critic_mean_absolute_error, critic_correlation_coefficient,
avg_angle_entropy, avg_hook_entropy, avg_hammer_entropy, avg_direction_entropy,
clip_param);
}
catch(const std::exception &e)
Expand Down
3 changes: 2 additions & 1 deletion src/engine/server/NN/ModelManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ struct ModelManager
double &avg_actor_grad_norm, double &avg_critic_grad_norm,
double &avg_actor_weight_norm, double &avg_critic_weight_norm,
double &avg_actor_activation_mean, double &avg_actor_activation_std,
double &critic_mean_absolute_error, double &critic_correlation_coefficient);
double &critic_mean_absolute_error, double &critic_correlation_coefficient,
double &avg_angle_entropy, double &avg_hook_entropy, double &avg_hammer_entropy, double &avg_direction_entropy);
void ReassignOldModels();

void Save(std::string filename);
Expand Down
36 changes: 20 additions & 16 deletions src/engine/server/NN/Models.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,38 @@ struct ActorCriticImpl : public torch::nn::Module

}

bool Initialize(int64_t n_in, int64_t n_out, double std)
bool Initialize(int64_t n_in, int64_t n_out, int64_t h_start, double std)
{
this->n_in = n_in;
this->n_out = n_out;
actor_network = torch::nn::Sequential(
torch::nn::Linear(n_in, 1024),
torch::nn::Linear(n_in, h_start),
torch::nn::ReLU(),
torch::nn::Linear(1024, 512),
torch::nn::Linear(h_start, h_start/2),
torch::nn::ReLU(),
torch::nn::Linear(512, 256),
torch::nn::Linear(h_start / 2, h_start/4),
torch::nn::ReLU(),
torch::nn::Linear(256, 128),
torch::nn::Linear(h_start / 4, h_start/8),
torch::nn::ReLU(),
torch::nn::Linear(128, n_out)
torch::nn::Linear(h_start / 8, h_start / 16),
torch::nn::ReLU(),
torch::nn::Linear(h_start/16, n_out)
//torch::nn::Tanh()
);
//mu_ = torch::full(n_out, 0.);
log_std_ = torch::full(2, std::log(std));
critic_network = torch::nn::Sequential(
torch::nn::Linear(n_in, 1024),
torch::nn::Linear(n_in, h_start),
torch::nn::ReLU(),
torch::nn::Linear(h_start, h_start / 2),
torch::nn::ReLU(),
torch::nn::Linear(1024, 512),
torch::nn::Linear(h_start / 2, h_start / 4),
torch::nn::ReLU(),
torch::nn::Linear(512, 256),
torch::nn::Linear(h_start / 4, h_start / 8),
torch::nn::ReLU(),
torch::nn::Linear(256, 128),
torch::nn::Linear(h_start / 8, h_start / 16),
torch::nn::ReLU(),
torch::nn::Linear(128, 1)
torch::nn::Linear(h_start / 16, 1)
);

//printf("Created from 0\n");
Expand Down Expand Up @@ -235,12 +239,12 @@ struct ActorCriticImpl : public torch::nn::Module
used_presamples = 0;
}

// Gaussian entropy
// Gaussian entropy
auto entropy_gaussian() -> torch::Tensor
{
// Differential entropy of normal distribution. For reference https://pytorch.org/docs/stable/_modules/torch/distributions/normal.html#Normal
auto gaussian_entropy = 0.5 + 0.5 * log(2 * M_PI) + log_std_;

// Sum over the last dimension (angle components)
return gaussian_entropy.sum(); // Shape [...]
}
Expand All @@ -264,20 +268,20 @@ struct ActorCriticImpl : public torch::nn::Module

auto entropy(torch::Tensor action) -> torch::Tensor
{
auto angles_entropy = entropy_gaussian().expand({action.size(0)});
auto angle_entropy = entropy_gaussian().expand({action.size(0)});

auto probs = torch::sigmoid(action.slice(1, 5, 6)); // Shape [batch_size, 1]
auto hook_entropy = entropy_bernoulli(probs);
probs = torch::sigmoid(action.slice(1, 6, 7)); // Shape [batch_size, 1]
auto hammer_entropy = entropy_bernoulli(probs);

probs = torch::softmax(action.slice(1, 2, 5), 1); // Shape [batch_size, 3]
auto dir_entropy = entropy_categorical(probs);
auto direction_entropy = entropy_categorical(probs);

hook_entropy = hook_entropy.squeeze(-1); // Convert from [batch_size, 1] to [batch_size]
hammer_entropy = hammer_entropy.squeeze(-1);

return angles_entropy + hook_entropy + hammer_entropy + dir_entropy;
return angle_entropy + hook_entropy + hammer_entropy + direction_entropy;
}

// Extract log probabilities for categorical distribution
Expand Down
38 changes: 34 additions & 4 deletions src/engine/server/NN/NeuralNetwork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,10 @@ void CNeuralNetwork::OnInit()
"Critic Mean Absolute Error",
"Critic Correlation Coefficient",
"Entropy",
"Angle entropy",
"Hook entropy",
"Hammer entropy",
"Direction entropy",
"Entropy coefficient",
"Actor grad norm",
"Critic grad norm",
Expand Down Expand Up @@ -938,14 +942,21 @@ void CNeuralNetwork::PostTick(float time_to_tick)
static float ball_on_spawn_reward = -0.1f; // -1.f There are 3 spawns. Center(at the start), left side and right side
static float ball_on_side_reward = 0.1f; // 0.05f If the ball is on your side it penalizes you, otherwise rewards you
static float ball_on_side_distance_reward = 0.1f; // 0.2f It means that if the ball is on your side and far from the net it always penalize you on that reward, if ball is half closer to net it penalize on half, but if on enemy side it rewards

// Hard to implement ideal finding distance from ball to goal
static float ball_moving_towards_net_reward = 0.3f; // 0.2f 10 If the ball is on your side it rewards for moving towards net, otherwise penalize. For example if ball moves from the farthest point to net in summ it would be this reward, so it calculates delta of moving to the net in %
static float ball_moving_towards_goal_reward = 0.03f; // 0.2f 10 On the enemy side it rewards if ball is moving toward goal

static float being_in_freeze_reward = -0.2f; // -0.1f if the bot is currently freezed it penalizes you on that reward
static float bot_is_grabbed_reward = 0.1f; // If the bot is currently grabbed to wall/ball applies to every tick
static float bot_is_holding_ball_reward = 0.05f; // If the bot is currently holding ball using hook it rewards every tick
static float bot_moving_towards_ball_reward = 0.1f; // Not implemented
static float bot_hitted_ball_reward = 2.0f; // Rewards bot for hitting ball
static float bot_hook_missed_reward = -0.5f; // Applies when bot teleported with ground teleporter

// Misses
static float bot_hammer_missed_reward = -0.2f; // Applies when bots hammer not hitted anything
static float bot_hook_missed_reward = -0.5f; // Applies when bots hook not hitted anything and it is retracting back to the bot

static float bot_teleported_reward = -1.f; // Applies when bot teleported with ground teleporter
static float step_reward = -0.02f; // -0.001f Applies every tick
static float divide_reward_by = 5.f;
Expand Down Expand Up @@ -1036,20 +1047,33 @@ void CNeuralNetwork::PostTick(float time_to_tick)
}

// Handle ball hit
if (first_bot_character->m_HittedBall)
if(first_bot_character->m_HittedBall)
{
first_bot_character->m_HittedBall = false;
first_bot_reward += bot_hitted_ball_reward;
cumulative_ball_hits += 1;
}

if(first_bot_character->m_HammerMissed)
{
first_bot_character->m_HammerMissed = false;
first_bot_reward += bot_hammer_missed_reward;
}

// Handle ball hit
if(second_bot_character->m_HittedBall)
{
second_bot_character->m_HittedBall = false;
second_bot_reward += bot_hitted_ball_reward;
cumulative_ball_hits += 1;
}

if(second_bot_character->m_HammerMissed)
{
second_bot_character->m_HammerMissed = false;
second_bot_reward += bot_hammer_missed_reward;
}

if (first_bot_character->GetCore().m_HookedPlayer && first_bot_character->GetCore().m_HookedPlayer % 3 == 2)
first_bot_reward += bot_is_holding_ball_reward;

Expand Down Expand Up @@ -1188,7 +1212,8 @@ void CNeuralNetwork::PostTick(float time_to_tick)
double avg_actor_grad_norm = 0, avg_critic_grad_norm = 0,
avg_actor_weight_norm = 0, avg_critic_weight_norm = 0,
avg_actor_activation_mean = 0, avg_actor_activation_std = 0,
critic_mean_absolute_error = 0, critic_correlation_coefficient = 0;
critic_mean_absolute_error = 0, critic_correlation_coefficient = 0,
avg_angle_entropy = 0, avg_hook_entropy = 0, avg_hammer_entropy = 0, avg_direction_entropy = 0;
bool updated = false;
size_t count_episodes = model_manager->GetCountEpisodes();

Expand All @@ -1204,7 +1229,8 @@ void CNeuralNetwork::PostTick(float time_to_tick)
avg_entropy,
avg_actor_grad_norm, avg_critic_grad_norm,
avg_actor_weight_norm, avg_critic_weight_norm,
avg_actor_activation_mean, avg_actor_activation_std, critic_mean_absolute_error, critic_correlation_coefficient);
avg_actor_activation_mean, avg_actor_activation_std, critic_mean_absolute_error, critic_correlation_coefficient,
avg_angle_entropy, avg_hook_entropy, avg_hammer_entropy, avg_direction_entropy);
count_episodes_processed += count_episodes;
count_every_update += 1;
auto update_tick_delta = m_pServer->Tick() - last_update_tick;
Expand Down Expand Up @@ -1261,6 +1287,10 @@ void CNeuralNetwork::PostTick(float time_to_tick)
<< "," << critic_mean_absolute_error
<< "," << critic_correlation_coefficient
<< "," << avg_entropy
<< "," << avg_angle_entropy
<< "," << avg_hook_entropy
<< "," << avg_hammer_entropy
<< "," << avg_direction_entropy
<< "," << model_manager->GetEntropyCoefficient()
<< "," << avg_actor_grad_norm
<< "," << avg_critic_grad_norm
Expand Down
Loading

0 comments on commit feb4571

Please sign in to comment.