Skip to content

Commit

Permalink
implement entire optimizer mechanism, add sgd
Browse files Browse the repository at this point in the history
  • Loading branch information
fm94 committed Jun 8, 2024
1 parent e76ee75 commit a360bcc
Show file tree
Hide file tree
Showing 13 changed files with 77 additions and 48 deletions.
10 changes: 6 additions & 4 deletions include/base/op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include "optimizer/base.hpp"
#include <Eigen/Dense>

namespace Tipousi
Expand All @@ -19,14 +20,15 @@ namespace Tipousi
virtual void backward(const Eigen::MatrixXf &out_grad,
Eigen::MatrixXf &in_grad) = 0;

void set_learning_rate(float learning_rate)
void set_optimizer(Optimizer::OptimizerBase *optimizer)
{
m_learning_rate = learning_rate;
}
m_optimizer = optimizer;
};

protected:
float m_learning_rate;
Eigen::MatrixXf m_current_inputs;
Eigen::MatrixXf m_current_outputs;

Optimizer::OptimizerBase *m_optimizer;
};
}; // namespace Tipousi
8 changes: 3 additions & 5 deletions include/graph/node.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "base/op.hpp"
#include "optimizer/base.hpp"
#include <memory>
#include <vector>

Expand All @@ -24,14 +25,11 @@ namespace Tipousi
void add_input(Node *node);
void add_output(Node *node);

void set_optimizer(Optimizer::OptimizerBase *optimizer);

std::vector<Node *> &get_outputs() { return m_outputs; }
std::vector<Node *> &get_inputs() { return m_inputs; }

void set_learning_rate(float learning_rate)
{
m_operation->set_learning_rate(learning_rate);
}

private:
Node(std::unique_ptr<Op> ptr);

Expand Down
9 changes: 4 additions & 5 deletions include/graph/sequential.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "graph/node.hpp"
#include "graph/trainable.hpp"
#include "optimizer/base.hpp"
#include <vector>

namespace Tipousi
Expand All @@ -13,7 +14,7 @@ namespace Tipousi

public:
Sequential(Node *input_node, Node *output_node,
float learning_rate);
Optimizer::OptimizerBase *optimizer);

~Sequential()
{
Expand All @@ -28,10 +29,8 @@ namespace Tipousi
void forward(const Eigen::MatrixXf &in, Eigen::MatrixXf &out);
void backward(Eigen::MatrixXf &initial_grads);

void train(const Data::Dataset &dataset,
const Optimizer::OptimizerBase &optimizer,
const Loss::LossBase &loss,
const uint32_t n_epochs) override;
void train(const Data::Dataset &dataset, const Loss::LossBase &loss,
const uint32_t n_epochs) override;

private:
Node *m_input_node = nullptr;
Expand Down
7 changes: 3 additions & 4 deletions include/graph/trainable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ namespace Tipousi
class Trainable
{
public:
virtual void train(const Data::Dataset &dataset,
const Optimizer::OptimizerBase &optimizer,
const Loss::LossBase &loss,
const uint32_t n_epochs) = 0;
virtual void train(const Data::Dataset &dataset,
const Loss::LossBase &loss,
const uint32_t n_epochs) = 0;

protected:
Trainable() = default;
Expand Down
8 changes: 8 additions & 0 deletions include/optimizer/base.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
#pragma once

#include <Eigen/Dense>

namespace Tipousi
{
namespace Optimizer
{
class OptimizerBase
{
public:
virtual void update_weights(Eigen::MatrixXf &weights,
Eigen::MatrixXf &grads) = 0;

protected:
float m_learning_rate;
};
}; // namespace Optimizer
}; // namespace Tipousi
5 changes: 4 additions & 1 deletion include/optimizer/sgd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ namespace Tipousi
class SGD : public OptimizerBase
{
public:
SGD();
SGD(const float learning_rate);
~SGD() = default;

void update_weights(Eigen::MatrixXf &weights,
Eigen::MatrixXf &grads) override;
};
}; // namespace Optimizer
}; // namespace Tipousi
8 changes: 7 additions & 1 deletion src/graph/node.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "graph/node.hpp"
#include "node.hpp"

namespace Tipousi
{
Expand Down Expand Up @@ -45,5 +46,10 @@ namespace Tipousi

void Node::add_output(Node *node) { m_outputs.push_back(node); }

void Node::set_optimizer(Optimizer::OptimizerBase *optimizer)
{
m_operation->set_optimizer(optimizer);
}

} // namespace Graph
} // namespace Tipousi
} // namespace Tipousi
11 changes: 5 additions & 6 deletions src/graph/sequential.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace Tipousi
namespace Graph
{
Sequential::Sequential(Node *input_node, Node *output_node,
float learning_rate)
Optimizer::OptimizerBase *optimizer)
: m_input_node(input_node), m_output_node(output_node)
{
// mechanism to register all nodes
Expand All @@ -16,8 +16,8 @@ namespace Tipousi
{
if (current_node)
{
current_node->set_optimizer(optimizer);
m_node_registry.push_back(current_node);
current_node->set_learning_rate(learning_rate);
// TODO hacky approachs: always take number 0
auto &output_nodes = current_node->get_outputs();
if (output_nodes.size() == 0 || !output_nodes[0])
Expand Down Expand Up @@ -78,10 +78,9 @@ namespace Tipousi
}
}

void Sequential::train(const Data::Dataset &dataset,
const Optimizer::OptimizerBase &optimizer,
const Loss::LossBase &loss_func,
const uint32_t n_epochs)
void Sequential::train(const Data::Dataset &dataset,
const Loss::LossBase &loss_func,
const uint32_t n_epochs)
{
for (uint32_t i{0}; i < n_epochs; i++)
{
Expand Down
5 changes: 3 additions & 2 deletions src/layer/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ namespace Tipousi
Eigen::MatrixXf weight_grad =
m_current_inputs.transpose() * out_grad;
Eigen::MatrixXf bias_grad = out_grad.colwise().sum();
m_weights -= m_learning_rate * weight_grad;
m_bias.row(0) -= m_learning_rate * bias_grad;
m_optimizer->update_weights(m_weights, weight_grad);
// .row(0) has been removed here! check whether it has "no" effect
m_optimizer->update_weights(m_bias, bias_grad);
in_grad = out_grad * m_weights.transpose();
}
} // namespace Layer
Expand Down
8 changes: 7 additions & 1 deletion src/optimizer/sgd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ namespace Tipousi
{
namespace Optimizer
{
SGD::SGD() {}
SGD::SGD(const float learning_rate) { m_learning_rate = learning_rate; }

void Optimizer::SGD::update_weights(Eigen::MatrixXf &weights,
Eigen::MatrixXf &grads)
{
weights -= m_learning_rate * grads;
}
} // namespace Optimizer
} // namespace Tipousi
17 changes: 9 additions & 8 deletions tests/test_adder_inference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,19 @@ TEST(SimpleNetTest, AdderTest)
int n_features{2};
int n_labels{1};

Node *node1 = Node::create<Dense>(n_features, 32);
Node *node1 = Node::create<Dense>(n_features, 16);
Node *node2 = Node::create<ReLU>();
Node *node3 = Node::create<Dense>(32, n_labels);
Node *node3 = Node::create<Dense>(16, n_labels);

// build the dependencies
node2->add_input(node1);
node3->add_input(node2);

float learning_rate{0.01f};
SGD sgd(learning_rate);

// create the graph (pass input and output nodes)
float learning_rate{0.005f};
Sequential net(node1, node3, learning_rate);
Sequential net(node1, node3, &sgd);

// test inference
Eigen::MatrixXf X(4, 2);
Expand All @@ -48,11 +50,10 @@ TEST(SimpleNetTest, AdderTest)
// create dataset
Dataset dataset(X, Y);

// define the optimizer and the loss
SGD sgd; // dummy
// define the loss
MSE mse;
auto start = std::chrono::high_resolution_clock::now();
net.train(dataset, sgd, mse, 50);
net.train(dataset, mse, 50);
auto end = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
Expand All @@ -77,5 +78,5 @@ TEST(SimpleNetTest, AdderTest)

std::cout << "gt: " << Y << std::endl;
std::cout << "predictions: " << preds << std::endl;
EXPECT_LT(loss, 1);
EXPECT_LT(loss, 0.02f);
}
10 changes: 8 additions & 2 deletions tests/test_dense.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#include "layer/dense.hpp"
#include "optimizer/sgd.hpp"
#include "utils.hpp"
#include <gtest/gtest.h>

using namespace Tipousi;
using namespace Layer;
using namespace Optimizer;

TEST(DenseLayerTest, ForwardPass)
{
Dense dense(3, 2);
dense.set_learning_rate(0.01f);

SGD sgd(0.01f);
dense.set_optimizer(&sgd);

Eigen::MatrixXf input(1, 3);
input << 1.0f, 2.0f, 3.0f;
Expand All @@ -24,7 +28,9 @@ TEST(DenseLayerTest, ForwardPass)
TEST(DenseLayerTest, BackwardPass)
{
Dense dense(3, 2);
dense.set_learning_rate(0.01f);

SGD sgd(0.01f);
dense.set_optimizer(&sgd);

Eigen::MatrixXf input(1, 3);
input << 1.0f, 2.0f, 3.0f;
Expand Down
19 changes: 10 additions & 9 deletions tests/test_xor_inference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,21 @@ TEST(SimpleNetTest, XORTest)
int n_features{2};
int n_labels{1};

Node *node1 = Node::create<Dense>(n_features, 32);
Node *node2 = Node::create<ReLU>();
Node *node3 = Node::create<Dense>(32, n_labels);
Node *node1 = Node::create<Dense>(n_features, 16);
Node *node2 = Node::create<Sigmoid>();
Node *node3 = Node::create<Dense>(16, n_labels);
Node *node4 = Node::create<Sigmoid>();

// build the dependencies
node2->add_input(node1);
node3->add_input(node2);
node4->add_input(node3);

float learning_rate{0.5f};
SGD sgd(learning_rate);

// create the graph (pass input and output nodes)
float learning_rate{0.01f};
Sequential net(node1, node4, learning_rate);
Sequential net(node1, node4, &sgd);

// test inference
Eigen::MatrixXf X(4, 2);
Expand All @@ -51,11 +53,10 @@ TEST(SimpleNetTest, XORTest)
// create dataset
Dataset dataset(X, Y);

// define the optimizer and the loss
SGD sgd; // dummy
// define the loss
MSE mse;
auto start = std::chrono::high_resolution_clock::now();
net.train(dataset, sgd, mse, 50);
net.train(dataset, mse, 200);
auto end = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
Expand All @@ -80,5 +81,5 @@ TEST(SimpleNetTest, XORTest)

std::cout << "gt: " << Y << std::endl;
std::cout << "predictions: " << preds << std::endl;
EXPECT_LT(loss, 1);
EXPECT_LT(loss, 0.5f); // better than random guessing
}

0 comments on commit a360bcc

Please sign in to comment.