Add some logs, Hammer missed,entropy normalization

CovERUshKA · Jan 31, 2025 · feb4571 · feb4571
1 parent 32a06c7
commit feb4571
Show file tree

Hide file tree

Showing 7 changed files with 125 additions and 33 deletions.
diff --git a/src/engine/server/NN/ModelManager.cpp b/src/engine/server/NN/ModelManager.cpp
@@ -22,22 +22,23 @@ namespace fs = std::filesystem;
 
 int64_t n_in = 40;
 int64_t n_out = 7;
+int64_t h_start = 1024;
 double std_dev = 1;
 double learning_rate = 5e-5; // Default: 5e-5
 double actor_learning_rate = 2e-4; // Default: 5e-5
-double critic_learning_rate = 1e-3; // Default: 1e-4
+double critic_learning_rate = 5e-4; // Default: 1e-4
 //double weight_decay = 0.0001;
 
 int64_t mini_batch_size = 8000; // 4096, 8192, 16384, 32768
 int64_t count_mini_batches = 1;
 int64_t max_mini_batch_size = 8000; // 4096, 8192, 16384, 32768
 int64_t ppo_epochs = 4;
-double ent_coef = 2e-3; // Entropy coefficient
+double ent_coef = 1e-2; // Entropy coefficient
 double min_ent_coef = 1e-4;
 double ent_decay_factor = 0.95;
 double clip_param = 0.2; // Default: 0.2
 float gamma = 0.99f; // Default: 0.99f Discount factor
-float lambda = 0.97f; // GAE lambda
+float lambda = 0.95f; // GAE lambda
 
 float old_models_train = 0.2f; // Percent of old models
 int count_cached_old_models = 100; // old_models_train * ((float)count_bots / 2.f)
@@ -115,8 +116,8 @@ ModelManager::ModelManager(bool is_training, std::string train_folder, size_t ba
 
 	torch::manual_seed(seed);
 
-	ac_update->Initialize(n_in, n_out, std_dev);
-	ac_work->Initialize(n_in, n_out, std_dev);
+	ac_update->Initialize(n_in, n_out, h_start, std_dev);
+	ac_work->Initialize(n_in, n_out, h_start, std_dev);
 
 	//graph_main_input_tensor = torch::empty({(int)(count_bots - old_bots_indexes.size()), n_in}, torch::kCUDA);
 	//graph_main_output_tensor = torch::empty({(int)(count_bots - old_bots_indexes.size()), n_out}, torch::kCUDA);
@@ -146,7 +147,7 @@ ModelManager::ModelManager(bool is_training, std::string train_folder, size_t ba
 	param_groups.push_back(torch::optim::OptimizerParamGroup({ac_update->critic_network->parameters()},
 							std::make_unique<torch::optim::AdamOptions>(critic_learning_rate)));
 	param_groups.push_back(torch::optim::OptimizerParamGroup({ac_update->log_std_},
-		std::make_unique<torch::optim::AdamOptions>(actor_learning_rate)));
+		std::make_unique<torch::optim::AdamOptions>(actor_learning_rate / 2.)));
 
 	opt = std::make_shared<torch::optim::Adam>(param_groups);
 
@@ -281,7 +282,7 @@ bool ModelManager::LoadModels(std::string folder_path, std::string main_model_na
 				std::string new_model_path = new_models_folder + "\\" + model_filename;
 
 				ActorCritic old_model;
-				old_model->Initialize(n_in, n_out, std_dev);
+				old_model->Initialize(n_in, n_out, h_start, std_dev);
 				torch::load(old_model, model_path);
 				old_model->eval();
 				old_model->to(device);
@@ -627,7 +628,7 @@ std::vector<ModelOutput> ModelManager::Decide(
 	hooks = hooks.reshape({(int)input_inputs.size()}).to(torch::kBool).to(torch::kCPU, true);
 	hammers = hammers.reshape({(int)input_inputs.size()}).to(torch::kBool).to(torch::kCPU, true);
 
-	// When CPC -> GPU no synchronize needed, but needed when GPU -> CPU https://pytorch.org/tutorials/intermediate/pinmem_nonblock.html
+	// When CPU -> GPU no synchronization needed, but needed when GPU -> CPU https://pytorch.org/tutorials/intermediate/pinmem_nonblock.html
 	cudaStreamSynchronize(c10::cuda::getCurrentCUDAStream());
 
 	auto angle_x_vec = angle_x.accessor<float, 1>(); // at::Half float
@@ -783,7 +784,8 @@ void ModelManager::Update(double avg_reward, bool cache_model, bool &updated,
 	double &avg_actor_grad_norm, double &avg_critic_grad_norm,
 	double &avg_actor_weight_norm, double &avg_critic_weight_norm,
 	double &avg_actor_activation_mean, double &avg_actor_activation_std,
-	double &critic_mean_absolute_error, double &critic_correlation_coefficient)
+	double &critic_mean_absolute_error, double &critic_correlation_coefficient,
+	double &avg_angle_entropy, double &avg_hook_entropy, double &avg_hammer_entropy, double &avg_direction_entropy)
 {
 	// Update.
 	if(!ac_work->is_training())
@@ -803,7 +805,7 @@ void ModelManager::Update(double avg_reward, bool cache_model, bool &updated,
 	if(cache_model)
 	{
 		ActorCritic old_model;
-		old_model->Initialize(n_in, n_out, std_dev);
+		old_model->Initialize(n_in, n_out, h_start, std_dev);
 		old_model->copy_from(ac_work.get());
 		old_model->eval();
 		old_ac.push_back(old_model);
@@ -825,6 +827,7 @@ void ModelManager::Update(double avg_reward, bool cache_model, bool &updated,
 			avg_actor_weight_norm, avg_critic_weight_norm,
 			avg_actor_activation_mean, avg_actor_activation_std,
 			critic_mean_absolute_error, critic_correlation_coefficient,
+			avg_angle_entropy, avg_hook_entropy, avg_hammer_entropy, avg_direction_entropy,
 			clip_param);
 	}
 	catch(const std::exception &e)

diff --git a/src/engine/server/NN/ModelManager.h b/src/engine/server/NN/ModelManager.h
@@ -135,7 +135,8 @@ struct ModelManager
 		double &avg_actor_grad_norm, double &avg_critic_grad_norm,
 		double &avg_actor_weight_norm, double &avg_critic_weight_norm,
 		double &avg_actor_activation_mean, double &avg_actor_activation_std,
-		double &critic_mean_absolute_error, double &critic_correlation_coefficient);
+		double &critic_mean_absolute_error, double &critic_correlation_coefficient,
+		double &avg_angle_entropy, double &avg_hook_entropy, double &avg_hammer_entropy, double &avg_direction_entropy);
 	void ReassignOldModels();
 
 	void Save(std::string filename);

diff --git a/src/engine/server/NN/Models.h b/src/engine/server/NN/Models.h
@@ -30,34 +30,38 @@ struct ActorCriticImpl : public torch::nn::Module
 
 	}
 
-    bool Initialize(int64_t n_in, int64_t n_out, double std)
+    bool Initialize(int64_t n_in, int64_t n_out, int64_t h_start, double std)
     {
 	    this->n_in = n_in;
 	    this->n_out = n_out;
 	    actor_network = torch::nn::Sequential(
-		    torch::nn::Linear(n_in, 1024),
+		    torch::nn::Linear(n_in, h_start),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(1024, 512),
+		    torch::nn::Linear(h_start, h_start/2),
 			torch::nn::ReLU(),
-		    torch::nn::Linear(512, 256),
+		    torch::nn::Linear(h_start / 2, h_start/4),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(256, 128),
+		    torch::nn::Linear(h_start / 4, h_start/8),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(128, n_out)
+		    torch::nn::Linear(h_start / 8, h_start / 16),
+		    torch::nn::ReLU(),
+		    torch::nn::Linear(h_start/16, n_out)
 		    //torch::nn::Tanh()
 		    );
 		//mu_ = torch::full(n_out, 0.);
 	    log_std_ = torch::full(2, std::log(std));
 		critic_network = torch::nn::Sequential(
-		    torch::nn::Linear(n_in, 1024),
+		    torch::nn::Linear(n_in, h_start),
+		    torch::nn::ReLU(),
+		    torch::nn::Linear(h_start, h_start / 2),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(1024, 512),
+		    torch::nn::Linear(h_start / 2, h_start / 4),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(512, 256),
+		    torch::nn::Linear(h_start / 4, h_start / 8),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(256, 128),
+		    torch::nn::Linear(h_start / 8, h_start / 16),
 		    torch::nn::ReLU(),
-		    torch::nn::Linear(128, 1)
+		    torch::nn::Linear(h_start / 16, 1)
 		);
 
 	    //printf("Created from 0\n");
@@ -235,12 +239,12 @@ struct ActorCriticImpl : public torch::nn::Module
 	    used_presamples = 0;
     }
 
-	 // Gaussian entropy
+	// Gaussian entropy
     auto entropy_gaussian() -> torch::Tensor
     {
 	    // Differential entropy of normal distribution. For reference https://pytorch.org/docs/stable/_modules/torch/distributions/normal.html#Normal
 	    auto gaussian_entropy = 0.5 + 0.5 * log(2 * M_PI) + log_std_;
-
+	    
 	    // Sum over the last dimension (angle components)
 	    return gaussian_entropy.sum(); // Shape [...]
     }
@@ -264,20 +268,20 @@ struct ActorCriticImpl : public torch::nn::Module
 
     auto entropy(torch::Tensor action) -> torch::Tensor
     {
-	    auto angles_entropy = entropy_gaussian().expand({action.size(0)});
+	    auto angle_entropy = entropy_gaussian().expand({action.size(0)});
 
 		auto probs = torch::sigmoid(action.slice(1, 5, 6)); // Shape [batch_size, 1]
 		auto hook_entropy = entropy_bernoulli(probs);
 		probs = torch::sigmoid(action.slice(1, 6, 7)); // Shape [batch_size, 1]
 		auto hammer_entropy = entropy_bernoulli(probs);
 
 		probs = torch::softmax(action.slice(1, 2, 5), 1); // Shape [batch_size, 3]
-		auto dir_entropy = entropy_categorical(probs);
+		auto direction_entropy = entropy_categorical(probs);
 
 		hook_entropy = hook_entropy.squeeze(-1); // Convert from [batch_size, 1] to [batch_size]
 		hammer_entropy = hammer_entropy.squeeze(-1);
 
-        return angles_entropy + hook_entropy + hammer_entropy + dir_entropy;
+        return angle_entropy + hook_entropy + hammer_entropy + direction_entropy;
     }
 
 	// Extract log probabilities for categorical distribution

diff --git a/src/engine/server/NN/NeuralNetwork.cpp b/src/engine/server/NN/NeuralNetwork.cpp
@@ -494,6 +494,10 @@ void CNeuralNetwork::OnInit()
 				"Critic Mean Absolute Error",
 				"Critic Correlation Coefficient",
 				"Entropy",
+				"Angle entropy",
+				"Hook entropy",
+				"Hammer entropy",
+				"Direction entropy",
 				"Entropy coefficient",
 				"Actor grad norm",
 				"Critic grad norm",
@@ -938,14 +942,21 @@ void CNeuralNetwork::PostTick(float time_to_tick)
 	static float ball_on_spawn_reward = -0.1f; // -1.f There are 3 spawns. Center(at the start), left side and right side
 	static float ball_on_side_reward = 0.1f; // 0.05f If the ball is on your side it penalizes you, otherwise rewards you
 	static float ball_on_side_distance_reward = 0.1f; // 0.2f It means that if the ball is on your side and far from the net it always penalize you on that reward, if ball is half closer to net it penalize on half, but if on enemy side it rewards
+
+	// Hard to implement ideal finding distance from ball to goal
 	static float ball_moving_towards_net_reward = 0.3f; // 0.2f 10 If the ball is on your side it rewards for moving towards net, otherwise penalize. For example if ball moves from the farthest point to net in summ it would be this reward, so it calculates delta of moving to the net in %
 	static float ball_moving_towards_goal_reward = 0.03f; // 0.2f 10 On the enemy side it rewards if ball is moving toward goal
+
 	static float being_in_freeze_reward = -0.2f; // -0.1f if the bot is currently freezed it penalizes you on that reward
 	static float bot_is_grabbed_reward = 0.1f; // If the bot is currently grabbed to wall/ball applies to every tick
 	static float bot_is_holding_ball_reward = 0.05f; // If the bot is currently holding ball using hook it rewards every tick
 	static float bot_moving_towards_ball_reward = 0.1f; // Not implemented
 	static float bot_hitted_ball_reward = 2.0f; // Rewards bot for hitting ball
-	static float bot_hook_missed_reward = -0.5f; // Applies when bot teleported with ground teleporter
+
+	// Misses
+	static float bot_hammer_missed_reward = -0.2f; // Applies when bots hammer not hitted anything
+	static float bot_hook_missed_reward = -0.5f; // Applies when bots hook not hitted anything and it is retracting back to the bot
+
 	static float bot_teleported_reward = -1.f; // Applies when bot teleported with ground teleporter
 	static float step_reward = -0.02f; // -0.001f Applies every tick
 	static float divide_reward_by = 5.f;
@@ -1036,20 +1047,33 @@ void CNeuralNetwork::PostTick(float time_to_tick)
 			}
 
 			// Handle ball hit
-			if (first_bot_character->m_HittedBall)
+			if(first_bot_character->m_HittedBall)
 			{
 				first_bot_character->m_HittedBall = false;
 				first_bot_reward += bot_hitted_ball_reward;
 				cumulative_ball_hits += 1;
 			}
 
+			if(first_bot_character->m_HammerMissed)
+			{
+				first_bot_character->m_HammerMissed = false;
+				first_bot_reward += bot_hammer_missed_reward;
+			}
+
+			// Handle ball hit
 			if(second_bot_character->m_HittedBall)
 			{
 				second_bot_character->m_HittedBall = false;
 				second_bot_reward += bot_hitted_ball_reward;
 				cumulative_ball_hits += 1;
 			}
 
+			if(second_bot_character->m_HammerMissed)
+			{
+				second_bot_character->m_HammerMissed = false;
+				second_bot_reward += bot_hammer_missed_reward;
+			}
+
 			if (first_bot_character->GetCore().m_HookedPlayer && first_bot_character->GetCore().m_HookedPlayer % 3 == 2)
 				first_bot_reward += bot_is_holding_ball_reward;
 
@@ -1188,7 +1212,8 @@ void CNeuralNetwork::PostTick(float time_to_tick)
 			double avg_actor_grad_norm = 0, avg_critic_grad_norm = 0,
 			avg_actor_weight_norm = 0, avg_critic_weight_norm = 0,
 			avg_actor_activation_mean = 0, avg_actor_activation_std = 0,
-			       critic_mean_absolute_error = 0, critic_correlation_coefficient = 0;
+			       critic_mean_absolute_error = 0, critic_correlation_coefficient = 0,
+			       avg_angle_entropy = 0, avg_hook_entropy = 0, avg_hammer_entropy = 0, avg_direction_entropy = 0;
 			bool updated = false;
 			size_t count_episodes = model_manager->GetCountEpisodes();
 
@@ -1204,7 +1229,8 @@ void CNeuralNetwork::PostTick(float time_to_tick)
 				avg_entropy,
 				avg_actor_grad_norm, avg_critic_grad_norm,
 				avg_actor_weight_norm, avg_critic_weight_norm,
-				avg_actor_activation_mean, avg_actor_activation_std, critic_mean_absolute_error, critic_correlation_coefficient);
+				avg_actor_activation_mean, avg_actor_activation_std, critic_mean_absolute_error, critic_correlation_coefficient,
+				avg_angle_entropy, avg_hook_entropy, avg_hammer_entropy, avg_direction_entropy);
 			count_episodes_processed += count_episodes;
 			count_every_update += 1;
 			auto update_tick_delta = m_pServer->Tick() - last_update_tick;
@@ -1261,6 +1287,10 @@ void CNeuralNetwork::PostTick(float time_to_tick)
 				       << "," << critic_mean_absolute_error
 				       << "," << critic_correlation_coefficient
 				       << "," << avg_entropy
+				       << "," << avg_angle_entropy
+				       << "," << avg_hook_entropy
+				       << "," << avg_hammer_entropy
+				       << "," << avg_direction_entropy
 				       << "," << model_manager->GetEntropyCoefficient()
 				       << "," << avg_actor_grad_norm
 				       << "," << avg_critic_grad_norm