mlpack · kartikdutt18 · May 28, 2021 · May 25, 2021 · May 25, 2021 · May 25, 2021
diff --git a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb
@@ -0,0 +1 @@
+{"cells":[{"metadata":{"trusted":true},"cell_type":"code","source":"/**\n * @file portfolio-optimization-nsga2-cpp.ipynb\n *\n * A simple practical application of Non Dominated Sorting Genetic Algorithm-2\n * (NSGA2) in portfolio optimization. This example allows user to freely choose \n * multiple stocks of their choice, which upon request, generates csv automagically \n * via a helper function.\n *\n * The algorithm will try and optimize the trade-off between the returns and\n * volatility of the requested stocks.\n *\n * Data from Pandas Datareader library (https://pandas-datareader.readthedocs.io/en/latest/).\n */","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#define ARMA_DONT_USE_WRAPPER","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#include <mlpack/xeus-cling.hpp>\n\n#include <ensmallen.hpp>\n#include \"../utils/portfolio.hpp\"","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"// Header files to create and show the plot.\n#define WITHOUT_NUMPY 1\n#include \"matplotlibcpp.h\"\n#include \"xwidgets/ximage.hpp\"\n\nnamespace plt = matplotlibcpp;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens::test;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 1. Set the Model Parameters"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will select the parameters for the optimizer. Parameters include name of the stocks, starting date, ending date and Finance API Source."},{"metadata":{"trusted":true},"cell_type":"code","source":"//! Declare user specified data.\nstd::string stocks, startDate, endDate, dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Type the name of symbol of the stocks via comma separated values (no spaces)\" << std::endl;\nstd::cin >> stocks;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"We're setting the data source to Yahoo Finance API by default. We encourage users to use custom data source, please refer pandas-datareader documentation for a list of available API sources."},{"metadata":{"trusted":true},"cell_type":"code","source":"dataSource = \"yahoo\";\n\n//! Uncomment to set custom data-source\n//std::cin >> dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Starting Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> startDate;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"End Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> endDate;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 2. Loading the Dataset"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will create a helper class which will generate the CSV file for us based on the parameters provided in previous sections. This class would also define the objective functions in question, namely: Return and Volatility. Ideally, we would want to maximize the returns and reduce the volatility. Since our implementation of algorithm works on minimization of all objectives, we have appended negative sign to the returns objective which converts it into a minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"class PortfolioFunction\n{\n  public:\n    PortfolioFunction(const std::string& stocks,\n                      const std::string& dataSource,\n                      const std::string& startDate,\n                      const std::string& endDate)\n    {\n    //! Generate the requested csv file.\n      Portfolio(stocks, dataSource, startDate, endDate,\"portfolio.csv\");\n      returns.load(\"portfolio.csv\", arma::csv_ascii);\n      returns.shed_col(0);\n\n      assets = returns.n_cols;\n    }\n\n    //! Get the starting point.\n    arma::mat GetInitialPoint()\n    {\n      return arma::Col<double>(assets, 1, arma::fill::zeros);\n    }\n\n    struct ObjectiveA\n    {\n        ObjectiveA(const arma::mat& returns) : returns(returns) {}\n\n        double Evaluate(const arma::mat& coords)\n        {\n          const double portfolioReturns = arma::accu(arma::mean(returns) %\n              coords.t()) * 252;\n\n          return -portfolioReturns;\n        }\n\n        arma::mat returns;\n    };\n\n    struct ObjectiveB\n    {\n        ObjectiveB(const arma::mat& returns) : returns(returns) {}\n\n        double Evaluate(const arma::mat& coords)\n        {\n          const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n                coords.t() * arma::cov(returns) * 252 * coords));\n          return portfolioVolatility;\n        }\n\n        arma::mat returns;\n    };\n\n    //! Get objective functions.\n    std::tuple<ObjectiveA, ObjectiveB> GetObjectives()\n    {\n      return std::make_tuple(ObjectiveA(returns), ObjectiveB(returns));\n    }\n\n    arma::mat returns;\n    size_t assets;\n};\n\n\n//! The constructor will generate the csv file.\nPortfolioFunction pf(stocks, dataSource, startDate, endDate);\n\nconst double lowerBound = 0;\nconst double upperBound = 1;\n\nens::NSGA2 opt(20, // population size: The number of candidates in the population.\n               300, // max generations: The maximum number of generations allowed.\n               0.5, // crossover probability: The probability that the elites reproduce.\n               0.5, // mutation  probability: The probability of mutation among the elite.\n               1e-3, // mutation strength: The strength of the mutation.\n               1e-6, // epsilon: The minimum difference required to distinguish between two solutions.\n               lowerBound, // lowerBound: Lower bound of the coordinates of the initial population\n               upperBound // upperBound: Upper bound of the coordinates of the initial population\n               );\n\narma::mat coords = pf.GetInitialPoint();\nauto objectives = pf.GetObjectives();","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 3. Optimization "},{"metadata":{},"cell_type":"markdown","source":"The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its performance in each objective. The member \"dominates\" another if it's assigned fitness is better than other, this creates an \"elite\" population. The elite population reproduce among themselves to produce even better off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". "},{"metadata":{},"cell_type":"markdown","source":"Begin Optimization!"},{"metadata":{"trusted":true},"cell_type":"code","source":"opt.Optimize(objectives, coords);","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Let's collect the results and inspect our first set of solution."},{"metadata":{"trusted":true},"cell_type":"code","source":"arma::cube paretoFront = opt.ParetoFront();\n\nstd::cout << paretoFront.slice(0) << std::endl;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Convert to neccessary data structure."},{"metadata":{"trusted":true},"cell_type":"code","source":"size_t populationSize = paretoFront.n_slices;\n\n//! Store the X, Y coordinates of the Pareto Front\nstd::vector<double> frontX(populationSize, 0.);\nstd::vector<double> frontY(populationSize, 0.);\n\nfor (size_t idx = 0; idx < populationSize; ++idx)\n{\n    frontX[idx] = paretoFront.slice(idx)(0);\n    frontY[idx] = paretoFront.slice(idx)(1);\n}","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 4.  Plotting"},{"metadata":{},"cell_type":"markdown","source":"Recall that previously, we appended -ve sign to the returns objective to convert it to minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"plt::figure_size(800, 800);\nplt::plot(frontX, frontY);\nplt::xlabel(\"Returns Objective\");\nplt::ylabel(\"Volatility Objective\");\n\nplt::title(\"The Pareto Front\");\nplt::legend();\n\nplt::save(\"./plot.png\");\nauto im = xw::image_from_file(\"plot.png\").finalize();\nim","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 5. Final Thoughts"},{"metadata":{},"cell_type":"markdown","source":"In this notebook, we've seen how a MultiObjective Optimization algorithm can help in investing in stocks. We specified custom stocks and seen in our algorithm optimize the returns vs volatility trade-off in live. Feel free to play around by selecting various stocks and see how the outcomes plays off. "}],"metadata":{"kernelspec":{"name":"xcpp14","display_name":"C++14","language":"C++14"},"language_info":{"codemirror_mode":"text/x-c++src","file_extension":".cpp","mimetype":"text/x-c++src","name":"c++","version":"14"}},"nbformat":4,"nbformat_minor":5}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"cells":[{"metadata":{"trusted":true},"cell_type":"code","source":"/*\n @file portfolio-optimization-nsga2-cpp.ipynb\n \n A simple practical application of Non Dominated Sorting Genetic Algorithm-2\n * (NSGA2) in portfolio optimization. This example allows user to freely choose \n * multiple stocks of their choice, which upon request, generates csv automagically \n * via a helper function.\n \n The algorithm will try and optimize the trade-off between the returns and\n * volatility of the requested stocks.\n \n Data from Pandas Datareader library (https://pandas-datareader.readthedocs.io/en/latest/).\n /","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#define ARMA_DONT_USE_WRAPPER","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#include <mlpack/xeus-cling.hpp>\n\n#include <ensmallen.hpp>\n#include \"../utils/portfolio.hpp\"","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"// Header files to create and show the plot.\n#define WITHOUT_NUMPY 1\n#include \"matplotlibcpp.h\"\n#include \"xwidgets/ximage.hpp\"\n\nnamespace plt = matplotlibcpp;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens::test;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 1. Set the Model Parameters"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will select the parameters for the optimizer. Parameters include name of the stocks, starting date, ending date and Finance API Source."},{"metadata":{"trusted":true},"cell_type":"code","source":"//! Declare user specified data.\nstd::string stocks, startDate, endDate, dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Type the name of symbol of the stocks via comma separated values (no spaces)\" << std::endl;\nstd::cin >> stocks;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"We're setting the data source to Yahoo Finance API by default. We encourage users to use custom data source, please refer pandas-datareader documentation for a list of available API sources."},{"metadata":{"trusted":true},"cell_type":"code","source":"dataSource = \"yahoo\";\n\n//! Uncomment to set custom data-source\n//std::cin >> dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Starting Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> startDate;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"End Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> endDate;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 2. Loading the Dataset"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will create a helper class which will generate the CSV file for us based on the parameters provided in previous sections. This class would also define the objective functions in question, namely: Return and Volatility. Ideally, we would want to maximize the returns and reduce the volatility. Since our implementation of algorithm works on minimization of all objectives, we have appended negative sign to the returns objective which converts it into a minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"class PortfolioFunction\n{\n public:\n PortfolioFunction(const std::string& stocks,\n const std::string& dataSource,\n const std::string& startDate,\n const std::string& endDate)\n {\n //! Generate the requested csv file.\n Portfolio(stocks, dataSource, startDate, endDate,\"portfolio.csv\");\n returns.load(\"portfolio.csv\", arma::csv_ascii);\n returns.shed_col(0);\n\n assets = returns.n_cols;\n }\n\n //! Get the starting point.\n arma::mat GetInitialPoint()\n {\n return arma::Col<double>(assets, 1, arma::fill::zeros);\n }\n\n struct ObjectiveA\n {\n ObjectiveA(const arma::mat& returns) : returns(returns) {}\n\n double Evaluate(const arma::mat& coords)\n {\n const double portfolioReturns = arma::accu(arma::mean(returns) %\n coords.t()) 252;\n\n return -portfolioReturns;\n }\n\n arma::mat returns;\n };\n\n struct ObjectiveB\n {\n ObjectiveB(const arma::mat& returns) : returns(returns) {}\n\n double Evaluate(const arma::mat& coords)\n {\n const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n coords.t() * arma::cov(returns) * 252 * coords));\n return portfolioVolatility;\n }\n\n arma::mat returns;\n };\n\n //! Get objective functions.\n std::tuple<ObjectiveA, ObjectiveB> GetObjectives()\n {\n return std::make_tuple(ObjectiveA(returns), ObjectiveB(returns));\n }\n\n arma::mat returns;\n size_t assets;\n};\n\n\n//! The constructor will generate the csv file.\nPortfolioFunction pf(stocks, dataSource, startDate, endDate);\n\nconst double lowerBound = 0;\nconst double upperBound = 1;\n\nens::NSGA2 opt(20, // population size: The number of candidates in the population.\n 300, // max generations: The maximum number of generations allowed.\n 0.5, // crossover probability: The probability that the elites reproduce.\n 0.5, // mutation probability: The probability of mutation among the elite.\n 1e-3, // mutation strength: The strength of the mutation.\n 1e-6, // epsilon: The minimum difference required to distinguish between two solutions.\n lowerBound, // lowerBound: Lower bound of the coordinates of the initial population\n upperBound // upperBound: Upper bound of the coordinates of the initial population\n );\n\narma::mat coords = pf.GetInitialPoint();\nauto objectives = pf.GetObjectives();","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 3. Optimization "},{"metadata":{},"cell_type":"markdown","source":"The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its performance in each objective. The member \"dominates\" another if it's assigned fitness is better than other, this creates an \"elite\" population. The elite population reproduce among themselves to produce even better off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". "},{"metadata":{},"cell_type":"markdown","source":"Begin Optimization!"},{"metadata":{"trusted":true},"cell_type":"code","source":"opt.Optimize(objectives, coords);","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Let's collect the results and inspect our first set of solution."},{"metadata":{"trusted":true},"cell_type":"code","source":"arma::cube paretoFront = opt.ParetoFront();\n\nstd::cout << paretoFront.slice(0) << std::endl;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Convert to neccessary data structure."},{"metadata":{"trusted":true},"cell_type":"code","source":"size_t populationSize = paretoFront.n_slices;\n\n//! Store the X, Y coordinates of the Pareto Front\nstd::vector<double> frontX(populationSize, 0.);\nstd::vector<double> frontY(populationSize, 0.);\n\nfor (size_t idx = 0; idx < populationSize; ++idx)\n{\n frontX[idx] = paretoFront.slice(idx)(0);\n frontY[idx] = paretoFront.slice(idx)(1);\n}","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 4. Plotting"},{"metadata":{},"cell_type":"markdown","source":"Recall that previously, we appended -ve sign to the returns objective to convert it to minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"plt::figure_size(800, 800);\nplt::plot(frontX, frontY);\nplt::xlabel(\"Returns Objective\");\nplt::ylabel(\"Volatility Objective\");\n\nplt::title(\"The Pareto Front\");\nplt::legend();\n\nplt::save(\"./plot.png\");\nauto im = xw::image_from_file(\"plot.png\").finalize();\nim","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 5. Final Thoughts"},{"metadata":{},"cell_type":"markdown","source":"In this notebook, we've seen how a MultiObjective Optimization algorithm can help in investing in stocks. We specified custom stocks and seen in our algorithm optimize the returns vs volatility trade-off in live. Feel free to play around by selecting various stocks and see how the outcomes plays off. "}],"metadata":{"kernelspec":{"name":"xcpp14","display_name":"C++14","language":"C++14"},"language_info":{"codemirror_mode":"text/x-c++src","file_extension":".cpp","mimetype":"text/x-c++src","name":"c++","version":"14"}},"nbformat":4,"nbformat_minor":5}
jonpsy marked this conversation as resolved. Show resolved Hide resolved