From f910bcbe4aee453bdc465270699396ec07fc37f3 Mon Sep 17 00:00:00 2001 From: Nanubala Gnana Sai <45007169+jonpsy@users.noreply.github.com> Date: Tue, 25 May 2021 13:07:13 +0530 Subject: [PATCH 1/6] initial push --- .../portfolio-optimization-nsga2-cpp.ipynb | 1 + 1 file changed, 1 insertion(+) create mode 100644 portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb diff --git a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb new file mode 100644 index 00000000..5ee73536 --- /dev/null +++ b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb @@ -0,0 +1 @@ +{"cells":[{"metadata":{"trusted":true},"cell_type":"code","source":"/**\n * @file portfolio-optimization-nsga2-cpp.ipynb\n *\n * A simple practical application of Non Dominated Sorting Genetic Algorithm-2\n * (NSGA2) in portfolio optimization. This example allows user to freely choose \n * multiple stocks of their choice, which upon request, generates csv automagically \n * via a helper function.\n *\n * The algorithm will try and optimize the trade-off between the returns and\n * volatility of the requested stocks.\n *\n * Data from Pandas Datareader library (https://pandas-datareader.readthedocs.io/en/latest/).\n */","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#define ARMA_DONT_USE_WRAPPER","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#include \n\n#include \n#include \"../utils/portfolio.hpp\"","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"// Header files to create and show the plot.\n#define WITHOUT_NUMPY 1\n#include \"matplotlibcpp.h\"\n#include \"xwidgets/ximage.hpp\"\n\nnamespace plt = matplotlibcpp;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens::test;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 1. Set the Model Parameters"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will select the parameters for the optimizer. Parameters include name of the stocks, starting date, ending date and Finance API Source."},{"metadata":{"trusted":true},"cell_type":"code","source":"//! Declare user specified data.\nstd::string stocks, startDate, endDate, dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Type the name of symbol of the stocks via comma separated values (no spaces)\" << std::endl;\nstd::cin >> stocks;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"We're setting the data source to Yahoo Finance API by default. We encourage users to use custom data source, please refer pandas-datareader documentation for a list of available API sources."},{"metadata":{"trusted":true},"cell_type":"code","source":"dataSource = \"yahoo\";\n\n//! Uncomment to set custom data-source\n//std::cin >> dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Starting Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> startDate;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"End Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> endDate;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 2. Loading the Dataset"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will create a helper class which will generate the CSV file for us based on the parameters provided in previous sections. This class would also define the objective functions in question, namely: Return and Volatility. Ideally, we would want to maximize the returns and reduce the volatility. Since our implementation of algorithm works on minimization of all objectives, we have appended negative sign to the returns objective which converts it into a minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"class PortfolioFunction\n{\n public:\n PortfolioFunction(const std::string& stocks,\n const std::string& dataSource,\n const std::string& startDate,\n const std::string& endDate)\n {\n //! Generate the requested csv file.\n Portfolio(stocks, dataSource, startDate, endDate,\"portfolio.csv\");\n returns.load(\"portfolio.csv\", arma::csv_ascii);\n returns.shed_col(0);\n\n assets = returns.n_cols;\n }\n\n //! Get the starting point.\n arma::mat GetInitialPoint()\n {\n return arma::Col(assets, 1, arma::fill::zeros);\n }\n\n struct ObjectiveA\n {\n ObjectiveA(const arma::mat& returns) : returns(returns) {}\n\n double Evaluate(const arma::mat& coords)\n {\n const double portfolioReturns = arma::accu(arma::mean(returns) %\n coords.t()) * 252;\n\n return -portfolioReturns;\n }\n\n arma::mat returns;\n };\n\n struct ObjectiveB\n {\n ObjectiveB(const arma::mat& returns) : returns(returns) {}\n\n double Evaluate(const arma::mat& coords)\n {\n const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n coords.t() * arma::cov(returns) * 252 * coords));\n return portfolioVolatility;\n }\n\n arma::mat returns;\n };\n\n //! Get objective functions.\n std::tuple GetObjectives()\n {\n return std::make_tuple(ObjectiveA(returns), ObjectiveB(returns));\n }\n\n arma::mat returns;\n size_t assets;\n};\n\n\n//! The constructor will generate the csv file.\nPortfolioFunction pf(stocks, dataSource, startDate, endDate);\n\nconst double lowerBound = 0;\nconst double upperBound = 1;\n\nens::NSGA2 opt(20, // population size: The number of candidates in the population.\n 300, // max generations: The maximum number of generations allowed.\n 0.5, // crossover probability: The probability that the elites reproduce.\n 0.5, // mutation probability: The probability of mutation among the elite.\n 1e-3, // mutation strength: The strength of the mutation.\n 1e-6, // epsilon: The minimum difference required to distinguish between two solutions.\n lowerBound, // lowerBound: Lower bound of the coordinates of the initial population\n upperBound // upperBound: Upper bound of the coordinates of the initial population\n );\n\narma::mat coords = pf.GetInitialPoint();\nauto objectives = pf.GetObjectives();","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 3. Optimization "},{"metadata":{},"cell_type":"markdown","source":"The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its performance in each objective. The member \"dominates\" another if it's assigned fitness is better than other, this creates an \"elite\" population. The elite population reproduce among themselves to produce even better off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". "},{"metadata":{},"cell_type":"markdown","source":"Begin Optimization!"},{"metadata":{"trusted":true},"cell_type":"code","source":"opt.Optimize(objectives, coords);","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Let's collect the results and inspect our first set of solution."},{"metadata":{"trusted":true},"cell_type":"code","source":"arma::cube paretoFront = opt.ParetoFront();\n\nstd::cout << paretoFront.slice(0) << std::endl;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Convert to neccessary data structure."},{"metadata":{"trusted":true},"cell_type":"code","source":"size_t populationSize = paretoFront.n_slices;\n\n//! Store the X, Y coordinates of the Pareto Front\nstd::vector frontX(populationSize, 0.);\nstd::vector frontY(populationSize, 0.);\n\nfor (size_t idx = 0; idx < populationSize; ++idx)\n{\n frontX[idx] = paretoFront.slice(idx)(0);\n frontY[idx] = paretoFront.slice(idx)(1);\n}","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 4. Plotting"},{"metadata":{},"cell_type":"markdown","source":"Recall that previously, we appended -ve sign to the returns objective to convert it to minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"plt::figure_size(800, 800);\nplt::plot(frontX, frontY);\nplt::xlabel(\"Returns Objective\");\nplt::ylabel(\"Volatility Objective\");\n\nplt::title(\"The Pareto Front\");\nplt::legend();\n\nplt::save(\"./plot.png\");\nauto im = xw::image_from_file(\"plot.png\").finalize();\nim","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 5. Final Thoughts"},{"metadata":{},"cell_type":"markdown","source":"In this notebook, we've seen how a MultiObjective Optimization algorithm can help in investing in stocks. We specified custom stocks and seen in our algorithm optimize the returns vs volatility trade-off in live. Feel free to play around by selecting various stocks and see how the outcomes plays off. "}],"metadata":{"kernelspec":{"name":"xcpp14","display_name":"C++14","language":"C++14"},"language_info":{"codemirror_mode":"text/x-c++src","file_extension":".cpp","mimetype":"text/x-c++src","name":"c++","version":"14"}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file From da81159d0b062f0a0cc38eff5c2a3999fb561bb3 Mon Sep 17 00:00:00 2001 From: Nanubala Gnana Sai <45007169+jonpsy@users.noreply.github.com> Date: Wed, 26 May 2021 02:50:11 +0530 Subject: [PATCH 2/6] Plotting working! Conclusion done. --- .../portfolio-optimization-nsga2-cpp.ipynb | 404 +++++++++++++++++- 1 file changed, 403 insertions(+), 1 deletion(-) diff --git a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb index 5ee73536..abb52d88 100644 --- a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb +++ b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb @@ -1 +1,403 @@ -{"cells":[{"metadata":{"trusted":true},"cell_type":"code","source":"/**\n * @file portfolio-optimization-nsga2-cpp.ipynb\n *\n * A simple practical application of Non Dominated Sorting Genetic Algorithm-2\n * (NSGA2) in portfolio optimization. This example allows user to freely choose \n * multiple stocks of their choice, which upon request, generates csv automagically \n * via a helper function.\n *\n * The algorithm will try and optimize the trade-off between the returns and\n * volatility of the requested stocks.\n *\n * Data from Pandas Datareader library (https://pandas-datareader.readthedocs.io/en/latest/).\n */","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#define ARMA_DONT_USE_WRAPPER","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"#include \n\n#include \n#include \"../utils/portfolio.hpp\"","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"// Header files to create and show the plot.\n#define WITHOUT_NUMPY 1\n#include \"matplotlibcpp.h\"\n#include \"xwidgets/ximage.hpp\"\n\nnamespace plt = matplotlibcpp;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"using namespace ens::test;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 1. Set the Model Parameters"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will select the parameters for the optimizer. Parameters include name of the stocks, starting date, ending date and Finance API Source."},{"metadata":{"trusted":true},"cell_type":"code","source":"//! Declare user specified data.\nstd::string stocks, startDate, endDate, dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Type the name of symbol of the stocks via comma separated values (no spaces)\" << std::endl;\nstd::cin >> stocks;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"We're setting the data source to Yahoo Finance API by default. We encourage users to use custom data source, please refer pandas-datareader documentation for a list of available API sources."},{"metadata":{"trusted":true},"cell_type":"code","source":"dataSource = \"yahoo\";\n\n//! Uncomment to set custom data-source\n//std::cin >> dataSource;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"Starting Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> startDate;","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"std::cout << \"End Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\nstd::cin >> endDate;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 2. Loading the Dataset"},{"metadata":{},"cell_type":"markdown","source":"In this section, we will create a helper class which will generate the CSV file for us based on the parameters provided in previous sections. This class would also define the objective functions in question, namely: Return and Volatility. Ideally, we would want to maximize the returns and reduce the volatility. Since our implementation of algorithm works on minimization of all objectives, we have appended negative sign to the returns objective which converts it into a minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"class PortfolioFunction\n{\n public:\n PortfolioFunction(const std::string& stocks,\n const std::string& dataSource,\n const std::string& startDate,\n const std::string& endDate)\n {\n //! Generate the requested csv file.\n Portfolio(stocks, dataSource, startDate, endDate,\"portfolio.csv\");\n returns.load(\"portfolio.csv\", arma::csv_ascii);\n returns.shed_col(0);\n\n assets = returns.n_cols;\n }\n\n //! Get the starting point.\n arma::mat GetInitialPoint()\n {\n return arma::Col(assets, 1, arma::fill::zeros);\n }\n\n struct ObjectiveA\n {\n ObjectiveA(const arma::mat& returns) : returns(returns) {}\n\n double Evaluate(const arma::mat& coords)\n {\n const double portfolioReturns = arma::accu(arma::mean(returns) %\n coords.t()) * 252;\n\n return -portfolioReturns;\n }\n\n arma::mat returns;\n };\n\n struct ObjectiveB\n {\n ObjectiveB(const arma::mat& returns) : returns(returns) {}\n\n double Evaluate(const arma::mat& coords)\n {\n const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n coords.t() * arma::cov(returns) * 252 * coords));\n return portfolioVolatility;\n }\n\n arma::mat returns;\n };\n\n //! Get objective functions.\n std::tuple GetObjectives()\n {\n return std::make_tuple(ObjectiveA(returns), ObjectiveB(returns));\n }\n\n arma::mat returns;\n size_t assets;\n};\n\n\n//! The constructor will generate the csv file.\nPortfolioFunction pf(stocks, dataSource, startDate, endDate);\n\nconst double lowerBound = 0;\nconst double upperBound = 1;\n\nens::NSGA2 opt(20, // population size: The number of candidates in the population.\n 300, // max generations: The maximum number of generations allowed.\n 0.5, // crossover probability: The probability that the elites reproduce.\n 0.5, // mutation probability: The probability of mutation among the elite.\n 1e-3, // mutation strength: The strength of the mutation.\n 1e-6, // epsilon: The minimum difference required to distinguish between two solutions.\n lowerBound, // lowerBound: Lower bound of the coordinates of the initial population\n upperBound // upperBound: Upper bound of the coordinates of the initial population\n );\n\narma::mat coords = pf.GetInitialPoint();\nauto objectives = pf.GetObjectives();","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 3. Optimization "},{"metadata":{},"cell_type":"markdown","source":"The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its performance in each objective. The member \"dominates\" another if it's assigned fitness is better than other, this creates an \"elite\" population. The elite population reproduce among themselves to produce even better off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". "},{"metadata":{},"cell_type":"markdown","source":"Begin Optimization!"},{"metadata":{"trusted":true},"cell_type":"code","source":"opt.Optimize(objectives, coords);","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Let's collect the results and inspect our first set of solution."},{"metadata":{"trusted":true},"cell_type":"code","source":"arma::cube paretoFront = opt.ParetoFront();\n\nstd::cout << paretoFront.slice(0) << std::endl;","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"Convert to neccessary data structure."},{"metadata":{"trusted":true},"cell_type":"code","source":"size_t populationSize = paretoFront.n_slices;\n\n//! Store the X, Y coordinates of the Pareto Front\nstd::vector frontX(populationSize, 0.);\nstd::vector frontY(populationSize, 0.);\n\nfor (size_t idx = 0; idx < populationSize; ++idx)\n{\n frontX[idx] = paretoFront.slice(idx)(0);\n frontY[idx] = paretoFront.slice(idx)(1);\n}","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 4. Plotting"},{"metadata":{},"cell_type":"markdown","source":"Recall that previously, we appended -ve sign to the returns objective to convert it to minimization problem."},{"metadata":{"trusted":true},"cell_type":"code","source":"plt::figure_size(800, 800);\nplt::plot(frontX, frontY);\nplt::xlabel(\"Returns Objective\");\nplt::ylabel(\"Volatility Objective\");\n\nplt::title(\"The Pareto Front\");\nplt::legend();\n\nplt::save(\"./plot.png\");\nauto im = xw::image_from_file(\"plot.png\").finalize();\nim","execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"### 5. Final Thoughts"},{"metadata":{},"cell_type":"markdown","source":"In this notebook, we've seen how a MultiObjective Optimization algorithm can help in investing in stocks. We specified custom stocks and seen in our algorithm optimize the returns vs volatility trade-off in live. Feel free to play around by selecting various stocks and see how the outcomes plays off. "}],"metadata":{"kernelspec":{"name":"xcpp14","display_name":"C++14","language":"C++14"},"language_info":{"codemirror_mode":"text/x-c++src","file_extension":".cpp","mimetype":"text/x-c++src","name":"c++","version":"14"}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "/**\n", + " * @file portfolio-optimization-nsga2-cpp.ipynb\n", + " *\n", + " * A simple practical application of Non Dominated Sorting Genetic Algorithm-2\n", + " * (NSGA2) in portfolio optimization. This example allows user to freely choose \n", + " * multiple stocks of their choice, which upon request, generates csv automagically \n", + " * via a helper function.\n", + " *\n", + " * The algorithm will try and optimize the trade-off between the returns and\n", + " * volatility of the requested stocks.\n", + " *\n", + " * Data from Pandas Datareader library (https://pandas-datareader.readthedocs.io/en/latest/).\n", + " */" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#define ARMA_DONT_USE_WRAPPER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#include \n", + "\n", + "#include \n", + "#include \"../utils/portfolio.hpp\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "// Header files to create and show the plot.\n", + "#define WITHOUT_NUMPY 1\n", + "#include \"matplotlibcpp.h\"\n", + "#include \"xwidgets/ximage.hpp\"\n", + "\n", + "namespace plt = matplotlibcpp;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "using namespace ens;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "using namespace ens::test;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Set the Model Parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section, we will select the parameters for the optimizer. Parameters include name of the stocks, starting date, ending date and Finance API Source." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "//! Declare user specified data.\n", + "std::string stocks, startDate, endDate, dataSource;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stocks = \"FB,TSLA\";\n", + "\n", + "//! Uncomment to set custom stocks.\n", + "// std::cout << \"Type the name of symbol of the stocks via comma separated values (no spaces)\" << std::endl;\n", + "// std::cin >> stocks;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're setting the data source to Yahoo Finance API by default. For custom data-source, refer pandas-datareader documentation to get the exhaustive list of available API sources." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataSource = \"yahoo\";\n", + "\n", + "//! Uncomment to set custom data-source.\n", + "//std::cin >> dataSource;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "startDate = \"15/01/2020\";\n", + "\n", + "//! Uncomment to set custom start-date.\n", + "// std::cout << \"Starting Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\n", + "// std::cin >> startDate;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "endDate = \"15/01/2021\";\n", + "\n", + "//! Uncomment to set custom end-date.\n", + "// std::cout << \"End Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\n", + "// std::cin >> endDate;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Loading the Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section, we will create a helper class which will generate the CSV file for us based on the parameters provided in previous section. This class would also define the objective functions in question, namely: Return and Volatility. Ideally, we would want to maximize the returns and reduce the volatility. Since our implementation of algorithm works on minimization of all objectives, we have appended negative sign to the returns objective which converts it into a minimization problem." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class PortfolioFunction\n", + "{\n", + " public:\n", + " PortfolioFunction(const std::string& stocks,\n", + " const std::string& dataSource,\n", + " const std::string& startDate,\n", + " const std::string& endDate)\n", + " {\n", + " //! Generate the requested csv file.\n", + " Portfolio(stocks, dataSource, startDate, endDate,\"portfolio.csv\");\n", + " returns.load(\"portfolio.csv\", arma::csv_ascii);\n", + " returns.shed_col(0);\n", + "\n", + " assets = returns.n_cols;\n", + " }\n", + "\n", + " //! Get the starting point.\n", + " arma::mat GetInitialPoint()\n", + " {\n", + " return arma::Col(assets, 1, arma::fill::zeros);\n", + " }\n", + "\n", + " struct ReturnsObjective\n", + " {\n", + " ReturnsObjective(const arma::mat& returns) : returns(returns) {}\n", + "\n", + " double Evaluate(const arma::mat& coords)\n", + " {\n", + " const double portfolioReturns = arma::accu(arma::mean(returns) %\n", + " coords.t()) * 252;\n", + " \n", + " //! Negative sign appended to convert to minimization problem.\n", + " return -portfolioReturns;\n", + " }\n", + "\n", + " arma::mat returns;\n", + " };\n", + "\n", + " struct VolatilityObjective\n", + " {\n", + " VolatilityObjective(const arma::mat& returns) : returns(returns) {}\n", + "\n", + " double Evaluate(const arma::mat& coords)\n", + " {\n", + " const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n", + " coords.t() * arma::cov(returns) * 252 * coords));\n", + " return portfolioVolatility;\n", + " }\n", + "\n", + " arma::mat returns;\n", + " };\n", + "\n", + " //! Get objective functions.\n", + " std::tuple GetObjectives()\n", + " {\n", + " return std::make_tuple(ReturnsObjective(returns), VolatilityObjective(returns));\n", + " }\n", + "\n", + " arma::mat returns;\n", + " size_t assets;\n", + "};\n", + "\n", + "\n", + "//! The constructor will generate the csv file.\n", + "PortfolioFunction pf(stocks, dataSource, startDate, endDate);\n", + "\n", + "const double lowerBound = 0;\n", + "const double upperBound = 1;\n", + "\n", + "ens::NSGA2 opt(50, // population size: The number of candidates in the population.\n", + " 300, // max generations: The maximum number of generations allowed.\n", + " 0.5, // crossover probability: The probability that the elites reproduce.\n", + " 0.5, // mutation probability: The probability of mutation among the elite.\n", + " 1e-3, // mutation strength: The strength of the mutation.\n", + " 1e-6, // epsilon: The minimum difference required to distinguish between two solutions.\n", + " lowerBound, // lowerBound: Lower bound of the coordinates of the initial population\n", + " upperBound // upperBound: Upper bound of the coordinates of the initial population\n", + " );\n", + "\n", + "arma::mat coords = pf.GetInitialPoint();\n", + "auto objectives = pf.GetObjectives();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Optimization " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its collective performance on all the objectives. A member \"dominates\" another if it's assigned fitness is better than other, this results in an \"elite\" population. The elite population reproduce among themselves to produce superior off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Begin Optimization! (This will take a fair amount of time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "opt.Optimize(objectives, coords);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's collect the results and inspect our first set of solution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "arma::cube paretoFront = opt.ParetoFront();\n", + "\n", + "std::cout << paretoFront.slice(0) << std::endl;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert to neccessary data structure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "size_t populationSize = paretoFront.n_slices;\n", + "\n", + "//! Store the X, Y coordinates of the Pareto Front\n", + "std::vector frontX(populationSize, 0.);\n", + "std::vector frontY(populationSize, 0.);\n", + "\n", + "for (size_t idx = 0; idx < populationSize; ++idx)\n", + "{\n", + " // Append negative again to restore the original \n", + " // maximization objective.\n", + " frontX[idx] = -paretoFront.slice(idx)(0);\n", + " frontY[idx] = paretoFront.slice(idx)(1);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Plotting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As said before, we desire higher returns and lower volatility. The Pareto Front generated gives an optimal set of solutions such that, higher volatility is traded-off with higher returns and vice-versa. Hence, all the solutions are \"optimal\". Based on user's preference, he/she can choose their solution from the generated front." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt::figure_size(800, 800);\n", + "plt::scatter(frontX, frontY, 50);\n", + "\n", + "plt::xlabel(\"Returns Objective\");\n", + "plt::ylabel(\"Volatility Objective\");\n", + "\n", + "plt::title(\"The Pareto Front\");\n", + "plt::legend();\n", + "\n", + "plt::save(\"./plot.png\");\n", + "auto im = xw::image_from_file(\"plot.png\").finalize();\n", + "im" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Final Thoughts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we've seen how a MultiObjective Optimization algorithm can help in investing in stocks. We specified our stocks and witnessed our algorithm optimize the returns vs volatility trade-off in live. Feel free to play around by selecting various stocks, start-date, end-date and see how the outcomes plays off. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "C++14", + "language": "C++14", + "name": "xcpp14" + }, + "language_info": { + "codemirror_mode": "text/x-c++src", + "file_extension": ".cpp", + "mimetype": "text/x-c++src", + "name": "c++", + "version": "14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 705b131374831959cc18f7c27068d5fded74e82e Mon Sep 17 00:00:00 2001 From: Nanubala Gnana Sai <45007169+jonpsy@users.noreply.github.com> Date: Wed, 26 May 2021 03:19:38 +0530 Subject: [PATCH 3/6] added zoq channel and ensmallen dep --- binder/environment.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/binder/environment.yml b/binder/environment.yml index a87415d1..ba35f7a7 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -1,5 +1,6 @@ channels: - conda-forge +- zoq dependencies: # Jupyter - jupyterlab=3 @@ -49,3 +50,4 @@ dependencies: - pandas - gxx_linux-64 - mlpack +- ensmallen From 34b84ddae2e5f9be8fe0ff64c49d6fb1d3eba3e6 Mon Sep 17 00:00:00 2001 From: Nanubala Gnana Sai <45007169+jonpsy@users.noreply.github.com> Date: Wed, 26 May 2021 19:55:02 +0530 Subject: [PATCH 4/6] Update portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb Co-authored-by: Marcus Edel --- .../portfolio-optimization-nsga2-cpp.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb index abb52d88..768ee40e 100644 --- a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb +++ b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb @@ -322,7 +322,7 @@ "source": [ "size_t populationSize = paretoFront.n_slices;\n", "\n", - "//! Store the X, Y coordinates of the Pareto Front\n", + "//! Store the X, Y coordinates of the Pareto Front.\n", "std::vector frontX(populationSize, 0.);\n", "std::vector frontY(populationSize, 0.);\n", "\n", From f18b6119e474321fbd743f5bd340dd9b67bcd54f Mon Sep 17 00:00:00 2001 From: Nanubala Gnana Sai <45007169+jonpsy@users.noreply.github.com> Date: Thu, 27 May 2021 15:54:57 +0530 Subject: [PATCH 5/6] Revert "Added callback" This reverts commit af0085e2884af9c5a69af0f8746944d57d5593ba. Added callback --- .../portfolio-optimization-nsga2-cpp.ipynb | 71 ++++++++++++++++++- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb index abb52d88..a76c6bdc 100644 --- a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb +++ b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb @@ -245,7 +245,7 @@ "const double lowerBound = 0;\n", "const double upperBound = 1;\n", "\n", - "ens::NSGA2 opt(50, // population size: The number of candidates in the population.\n", + "ens::NSGA2 opt(20, // population size: The number of candidates in the population.\n", " 300, // max generations: The maximum number of generations allowed.\n", " 0.5, // crossover probability: The probability that the elites reproduce.\n", " 0.5, // mutation probability: The probability of mutation among the elite.\n", @@ -259,6 +259,71 @@ "auto objectives = pf.GetObjectives();" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Building Custom Callback" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We would like to query our optimizer after a certain number of generations " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class QueryFront\n", + "{\n", + " public:\n", + " //! Setup the Callback with user defined queryRate value and paretoFrontArray.\n", + " QueryFront(const size_t queryRate, std::vector& paretoFrontArray) : \n", + " queryRate(queryRate), \n", + " paretoFrontArray(paretoFrontArray), \n", + " genCounter(0)\n", + " { /* Nothing to do here */ }\n", + " \n", + " template\n", + " void StepTaken(OptimizerType& opt,\n", + " FunctionType& /* function */,\n", + " MatType& /* coordinates */)\n", + " {\n", + " if(genCounter % queryRate == 0)\n", + " paretoFrontArray.push_back(opt.ParetoFront());\n", + " \n", + " ++genCounter;\n", + " }\n", + " \n", + " private:\n", + " //! A reference to the array of pareto fronts.\n", + " std::vector& paretoFrontArray;\n", + " //! The rate of query.\n", + " size_t queryRate;\n", + " //! A counter for the current generation.\n", + " size_t genCounter;\n", + "};" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "//! We will store the array of pareto fronts in this data structure.\n", + "std::vector paretoFrontArray;\n", + "size_t queryRate = 20;\n", + "\n", + "//! Uncomment to set custom query rate.\n", + "// std::cin >> queryRate;" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -286,7 +351,7 @@ "metadata": {}, "outputs": [], "source": [ - "opt.Optimize(objectives, coords);" + "opt.Optimize(objectives, coords, QueryFront(20, paretoFrontArray));" ] }, { @@ -322,7 +387,7 @@ "source": [ "size_t populationSize = paretoFront.n_slices;\n", "\n", - "//! Store the X, Y coordinates of the Pareto Front\n", + "//! Store the X, Y coordinates of the Pareto Front.\n", "std::vector frontX(populationSize, 0.);\n", "std::vector frontY(populationSize, 0.);\n", "\n", From f8bedd4e2199d787660a7db0ce0109a948b96947 Mon Sep 17 00:00:00 2001 From: Nanubala Gnana Sai <45007169+jonpsy@users.noreply.github.com> Date: Thu, 27 May 2021 21:55:48 +0530 Subject: [PATCH 6/6] - remove #ARMA_DONT_USE_WRAPPER. - Add dominance relation in Optimize section. - Explain X and Y-Axis in Plotting section. - Use the parameters from the blog. --- .../portfolio-optimization-nsga2-cpp.ipynb | 140 +++++------------- 1 file changed, 41 insertions(+), 99 deletions(-) diff --git a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb index a76c6bdc..073f8f6a 100644 --- a/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb +++ b/portfolio_optimization_nsga2/portfolio-optimization-nsga2-cpp.ipynb @@ -21,15 +21,6 @@ " */" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#define ARMA_DONT_USE_WRAPPER" - ] - }, { "cell_type": "code", "execution_count": null, @@ -104,7 +95,7 @@ "metadata": {}, "outputs": [], "source": [ - "stocks = \"FB,TSLA\";\n", + "stocks = \"AAPL,NKE,GOOGL,AMZN\";\n", "\n", "//! Uncomment to set custom stocks.\n", "// std::cout << \"Type the name of symbol of the stocks via comma separated values (no spaces)\" << std::endl;\n", @@ -136,7 +127,7 @@ "metadata": {}, "outputs": [], "source": [ - "startDate = \"15/01/2020\";\n", + "startDate = \"01/01/2015\";\n", "\n", "//! Uncomment to set custom start-date.\n", "// std::cout << \"Starting Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\n", @@ -149,7 +140,7 @@ "metadata": {}, "outputs": [], "source": [ - "endDate = \"15/01/2021\";\n", + "endDate = \"31/12/2019\";\n", "\n", "//! Uncomment to set custom end-date.\n", "// std::cout << \"End Date (YYYY/MM/DD or DD/MM/YYYY)\" << std::endl;\n", @@ -197,41 +188,42 @@ " {\n", " return arma::Col(assets, 1, arma::fill::zeros);\n", " }\n", - "\n", - " struct ReturnsObjective\n", + " \n", + " struct VolatilityObjective\n", " {\n", - " ReturnsObjective(const arma::mat& returns) : returns(returns) {}\n", + " VolatilityObjective(const arma::mat& returns) : returns(returns) {}\n", "\n", " double Evaluate(const arma::mat& coords)\n", " {\n", - " const double portfolioReturns = arma::accu(arma::mean(returns) %\n", - " coords.t()) * 252;\n", - " \n", - " //! Negative sign appended to convert to minimization problem.\n", - " return -portfolioReturns;\n", + " const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n", + " coords.t() * arma::cov(returns) * 252 * coords));\n", + " return portfolioVolatility;\n", " }\n", "\n", " arma::mat returns;\n", " };\n", "\n", - " struct VolatilityObjective\n", + " struct ReturnsObjective\n", " {\n", - " VolatilityObjective(const arma::mat& returns) : returns(returns) {}\n", + " ReturnsObjective(const arma::mat& returns) : returns(returns) {}\n", "\n", " double Evaluate(const arma::mat& coords)\n", " {\n", - " const double portfolioVolatility = arma::as_scalar(arma::sqrt(\n", - " coords.t() * arma::cov(returns) * 252 * coords));\n", - " return portfolioVolatility;\n", + " const double portfolioReturns = arma::accu(arma::mean(returns) %\n", + " coords.t()) * 252;\n", + " \n", + " //! Negative sign appended to convert to minimization problem.\n", + " return -portfolioReturns;\n", " }\n", "\n", " arma::mat returns;\n", " };\n", "\n", + "\n", " //! Get objective functions.\n", - " std::tuple GetObjectives()\n", + " std::tuple GetObjectives()\n", " {\n", - " return std::make_tuple(ReturnsObjective(returns), VolatilityObjective(returns));\n", + " return std::make_tuple(VolatilityObjective(returns), ReturnsObjective(returns));\n", " }\n", "\n", " arma::mat returns;\n", @@ -259,71 +251,6 @@ "auto objectives = pf.GetObjectives();" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Building Custom Callback" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We would like to query our optimizer after a certain number of generations " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class QueryFront\n", - "{\n", - " public:\n", - " //! Setup the Callback with user defined queryRate value and paretoFrontArray.\n", - " QueryFront(const size_t queryRate, std::vector& paretoFrontArray) : \n", - " queryRate(queryRate), \n", - " paretoFrontArray(paretoFrontArray), \n", - " genCounter(0)\n", - " { /* Nothing to do here */ }\n", - " \n", - " template\n", - " void StepTaken(OptimizerType& opt,\n", - " FunctionType& /* function */,\n", - " MatType& /* coordinates */)\n", - " {\n", - " if(genCounter % queryRate == 0)\n", - " paretoFrontArray.push_back(opt.ParetoFront());\n", - " \n", - " ++genCounter;\n", - " }\n", - " \n", - " private:\n", - " //! A reference to the array of pareto fronts.\n", - " std::vector& paretoFrontArray;\n", - " //! The rate of query.\n", - " size_t queryRate;\n", - " //! A counter for the current generation.\n", - " size_t genCounter;\n", - "};" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "//! We will store the array of pareto fronts in this data structure.\n", - "std::vector paretoFrontArray;\n", - "size_t queryRate = 20;\n", - "\n", - "//! Uncomment to set custom query rate.\n", - "// std::cin >> queryRate;" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -335,7 +262,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its collective performance on all the objectives. A member \"dominates\" another if it's assigned fitness is better than other, this results in an \"elite\" population. The elite population reproduce among themselves to produce superior off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". " + "The NSGA2 is a genetic algorithm which works by assigning fitness to each population member based on its collective performance on all the objectives. A member \"dominates\" another if it's assigned fitness is better than other, this results in an \"elite\" population. The elite population reproduce among themselves to produce superior off-springs. This process is done iteratively to arrive at an optimal set of solution known as the \"Pareto Front\". \n", + "\n", + "The dominance relation is as follows:\n", + "\n", + "$$x_1\\ \\preceq_{D}\\ x_2$$ if $x_1$ is better than or equivalent to $x_2$ in all the specified objectives.\n", + "\n", + "where $\\preceq_{D}$ is the dominance symbol. " ] }, { @@ -351,7 +284,7 @@ "metadata": {}, "outputs": [], "source": [ - "opt.Optimize(objectives, coords, QueryFront(20, paretoFrontArray));" + "opt.Optimize(objectives, coords);" ] }, { @@ -393,10 +326,11 @@ "\n", "for (size_t idx = 0; idx < populationSize; ++idx)\n", "{\n", + "\n", + " frontX[idx] = paretoFront.slice(idx)(0);\n", " // Append negative again to restore the original \n", " // maximization objective.\n", - " frontX[idx] = -paretoFront.slice(idx)(0);\n", - " frontY[idx] = paretoFront.slice(idx)(1);\n", + " frontY[idx] = -paretoFront.slice(idx)(1);\n", "}" ] }, @@ -411,7 +345,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As said before, we desire higher returns and lower volatility. The Pareto Front generated gives an optimal set of solutions such that, higher volatility is traded-off with higher returns and vice-versa. Hence, all the solutions are \"optimal\". Based on user's preference, he/she can choose their solution from the generated front." + "As said before, we desire higher returns and lower volatility. The Pareto Front generated gives an optimal set of solutions such that, higher volatility is traded-off with higher returns and vice-versa. Hence, all the solutions are \"optimal\". Based on user's preference, he/she can choose their solution from the generated front.\n", + "\n", + "The Axis Labels are as follows:\n", + "\n", + "X-Axis: Volatility\n", + "\n", + "Y-Axis: Returns\n", + "\n", + "We expect an increase in volatility with increase in returns." ] }, { @@ -423,8 +365,8 @@ "plt::figure_size(800, 800);\n", "plt::scatter(frontX, frontY, 50);\n", "\n", - "plt::xlabel(\"Returns Objective\");\n", - "plt::ylabel(\"Volatility Objective\");\n", + "plt::xlabel(\"Returns\");\n", + "plt::ylabel(\"Volatility\");\n", "\n", "plt::title(\"The Pareto Front\");\n", "plt::legend();\n",