diff --git a/AutoOfflineRL/CondaPkg.toml b/AutoOfflineRL/CondaPkg.toml new file mode 100644 index 0000000..1373550 --- /dev/null +++ b/AutoOfflineRL/CondaPkg.toml @@ -0,0 +1,10 @@ +channels = ["mkl", "scikit-learn"] + +[deps] +scikit-learn = "" +pandas = "" +numpy = "" +python = "" + +[pip.deps] +d3rlpy = "" diff --git a/AutoOfflineRL/LICENSE b/AutoOfflineRL/LICENSE new file mode 100644 index 0000000..554ba21 --- /dev/null +++ b/AutoOfflineRL/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Paulito Palmes, PhD and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/AutoOfflineRL/Project.toml b/AutoOfflineRL/Project.toml new file mode 100644 index 0000000..f462f01 --- /dev/null +++ b/AutoOfflineRL/Project.toml @@ -0,0 +1,26 @@ +name = "AutoOfflineRL" +uuid = "4680bba7-2b59-4a6e-a544-0ebac8b8cdd3" +authors = ["Paulito Palmes, PhD "] +version = "0.1.0" + +[deps] +AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6" +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +MicroMamba = "0b3b1443-0f03-428d-bdfb-f27f9c1191ea" +Parquet = "626c502c-15b0-58ad-a749-f091afb673ae" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[compat] +julia = "1" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/AutoOfflineRL/README.md b/AutoOfflineRL/README.md new file mode 100644 index 0000000..e69de29 diff --git a/AutoOfflineRL/data/smalldata.parquet b/AutoOfflineRL/data/smalldata.parquet new file mode 100644 index 0000000..d3eee0b Binary files /dev/null and b/AutoOfflineRL/data/smalldata.parquet differ diff --git a/AutoOfflineRL/examples/Project.toml b/AutoOfflineRL/examples/Project.toml new file mode 100644 index 0000000..9180c1f --- /dev/null +++ b/AutoOfflineRL/examples/Project.toml @@ -0,0 +1,9 @@ +[deps] +AutoMLPipeline = "08437348-eef5-4817-bc1b-d4e9459680d6" +AutoOfflineRL = "4680bba7-2b59-4a6e-a544-0ebac8b8cdd3" +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" +Parquet = "626c502c-15b0-58ad-a749-f091afb673ae" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" diff --git a/AutoOfflineRL/examples/demo.jl b/AutoOfflineRL/examples/demo.jl new file mode 100644 index 0000000..787c139 --- /dev/null +++ b/AutoOfflineRL/examples/demo.jl @@ -0,0 +1,90 @@ +using Distributed + +nprocs() == 1 && addprocs() + +@everywhere begin + using AutoOfflineRL + using AutoMLPipeline + using Parquet + using DataFrames +end + +@everywhere begin + # load preprocessing elements + #### Scaler + rb = SKPreprocessor("RobustScaler"); + pt = SKPreprocessor("PowerTransformer"); + norm = SKPreprocessor("Normalizer"); + mx = SKPreprocessor("MinMaxScaler"); + std = SKPreprocessor("StandardScaler") + ##### Column selector + catf = CatFeatureSelector(); + numf = NumFeatureSelector(); + ## load filters + ##### Decomposition + #apca = SKPreprocessor("PCA",Dict(:autocomponent=>true,:name=>"autoPCA")); + #afa = SKPreprocessor("FactorAnalysis",Dict(:autocomponent=>true,:name=>"autoFA")); + #aica = SKPreprocessor("FastICA",Dict(:autocomponent=>true,:name=>"autoICA")); + pca = SKPreprocessor("PCA"); + fa = SKPreprocessor("FactorAnalysis"); + ica = SKPreprocessor("FastICA"); + noop = Identity(Dict(:name => "Noop")); +end + +# load dataset +path = pkgdir(AutoOfflineRL) +dataset = "$path/data/smalldata.parquet" +df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing + +#df = df[:,["day", "hour", "minute", "dow"]] +#df.sensor1 = rand(1:500,srow) +#df.sensor2 = rand(1:200,srow) +#df.sensor3 = rand(1:100,srow) +#df.action = rand([10,50,100],srow) +#df.reward = rand(srow) + +srow,_ = size(df) +observation = df[:, ["day", "hour", "minute", "dow", "sensor1", "sensor2", "sensor3"]] +reward = df[:,["reward"]] |> deepcopy |> DataFrame +action = df[:,["action"]] |> deepcopy |> DataFrame +_terminals = zeros(Int,srow) +_terminals[collect(100:1000:9000)] .= 1 +_terminals[end] = 1 +dterminal = DataFrame(terminal=_terminals) +action_reward_terminal = DataFrame[action, reward, dterminal] + +agent = DiscreteRLOffline("NFQ") +pipe = (numf |> mx |> pca) |> agent +crossvalidateRL(pipe,observation,action_reward_terminal) + +function pipelinesearch() + agentnames = ["DiscreteCQL","NFQ","DoubleDQN","DiscreteSAC","DiscreteBCQ","DiscreteBC","DQN"] + scalers = [rb,pt,norm,std,mx,noop] + extractors = [pca,ica,fa,noop] + dfresults = @sync @distributed (vcat) for agentname in agentnames + @distributed (vcat) for sc in scalers + @distributed (vcat) for xt in extractors + try + rlagent = DiscreteRLOffline(agentname,Dict(:runtime_args=>Dict(:n_epochs=>1))) + rlpipeline = ((numf |> sc |> xt)) |> rlagent + res = crossvalidateRL(rlpipeline,observation,action_reward_terminal) + scn = sc.name[1:end - 4]; xtn = xt.name[1:end - 4]; lrn = rlagent.name[1:end - 4] + pname = "$scn |> $xtn |> $lrn" + if !isnan(res) + DataFrame(pipeline=pname,td_error=res) + else + DataFrame() + end + catch e + println("error in $agentname") + DataFrame() + end + end + end + end + #sort!(dfresults,:percent_action_matches,rev=true) + return dfresults +end +dftable= pipelinesearch() +sort!(dftable,:td_error,rev=false) +show(dftable,allcols=true,allrows=true,truncate=0) diff --git a/AutoOfflineRL/examples/offlinerl.png b/AutoOfflineRL/examples/offlinerl.png new file mode 100644 index 0000000..526a9ca Binary files /dev/null and b/AutoOfflineRL/examples/offlinerl.png differ diff --git a/AutoOfflineRL/examples/presentation.ipynb b/AutoOfflineRL/examples/presentation.ipynb new file mode 100644 index 0000000..5dba41c --- /dev/null +++ b/AutoOfflineRL/examples/presentation.ipynb @@ -0,0 +1,3164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "80737302", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Wrapping Up Offline RL as part of AutoMLPipeline Workflow\n", + "\n", + "- Paulito Palmes\n", + "- IBM Research Europe\n", + "- Dublin Research Lab" + ] + }, + { + "cell_type": "markdown", + "id": "36560868", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Preliminaries\n" + ] + }, + { + "cell_type": "markdown", + "id": "5a4b43d3", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "### Online RL vs Offline RL" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7010ee90", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "### Online RL\n", + "\n", + "\"online\n", + "\n", + "- maximize return (accumulation of sum of discounted current and future rewards)\n", + "- each observation is part of a sequence of trajectory\n", + "- each action influences future observations and accumulated rewards\n", + "- unlike in typical ML problem where the objective is to make one time prediction of the action to take, RL makes a series of predictions dynamically as it receives observations and optimizes accumulation of corresponding rewards\n", + "\n", + "ref: https://rail.eecs.berkeley.edu/deeprlcourse/" + ] + }, + { + "cell_type": "markdown", + "id": "01fa7d38", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "### Offline RL\n", + "\n", + "\"offline\n", + "\n", + "- in online RL, new data is collected for policy update\n", + "- in off-policy RL, some old data are retained together with new data for policy update\n", + "- in offline RL, all data is collected in advanced to train an RL agent for optimal policy by sampling episodes\n", + "\n", + "ref: https://rail.eecs.berkeley.edu/deeprlcourse/" + ] + }, + { + "cell_type": "markdown", + "id": "be0e2f21", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "### Why Offline RL\n", + "\n", + "- Cost: it can be too expensive to interact repeatedly and explore certain environment such as workload or resource management in the cloud but cheaper to collect logs and statistics for offline RL learning\n", + "- Risk: it can be risky to train an agent in autonomous driving and robotic operations\n", + "- Technological advancement in batch learning with deep learning architectures that can scale-up effectively for large datasets" + ] + }, + { + "cell_type": "markdown", + "id": "97842e27", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Major Objective\n", + "\n", + "Given a dataset containing trajectories, create an AutoMLPipeline wrapper function for offline RL to make it trivial to search for the best data processing pipeline for offline RL application." + ] + }, + { + "cell_type": "markdown", + "id": "293d109d", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Load packages " + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "f827b577-0f46-40e7-ab51-ebba5ed919c4", + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "using Distributed\n", + "\n", + "nprocs() == 1 && addprocs() \n", + "\n", + "@everywhere begin\n", + " using AutoOfflineRL\n", + " using AutoMLPipeline\n", + " using Parquet\n", + " using DataFrames\n", + "end" + ] + }, + { + "cell_type": "markdown", + "id": "fa381a6e", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Load Preprocessing Elements" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "6d7d36e9-07c4-4f69-8c18-b90af19dc6e1", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "@everywhere begin\n", + " #### Scaler\n", + " rb = SKPreprocessor(\"RobustScaler\");\n", + " pt = SKPreprocessor(\"PowerTransformer\");\n", + " norm = SKPreprocessor(\"Normalizer\");\n", + " mx = SKPreprocessor(\"MinMaxScaler\");\n", + " std = SKPreprocessor(\"StandardScaler\")\n", + " ##### Column selector\n", + " catf = CatFeatureSelector();\n", + " numf = NumFeatureSelector();\n", + " #### feature extractors\n", + " pca = SKPreprocessor(\"PCA\");\n", + " fa = SKPreprocessor(\"FactorAnalysis\");\n", + " ica = SKPreprocessor(\"FastICA\");\n", + " noop = Identity(Dict(:name => \"Noop\"));\n", + " #### ML/RL agents\n", + " rf = RandomForest()\n", + " tree = PrunedTree()\n", + " dqn = DiscreteRLOffline(\"DQN\")\n", + " sac = DiscreteRLOffline(\"DiscreteSAC\")\n", + "end" + ] + }, + { + "cell_type": "markdown", + "id": "f27f579c", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Load Offline Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "65e72008-c3d6-44f8-80fa-32d9b2cc81ce", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
10×9 DataFrame
Rowdayhourminutedowsensor1sensor2sensor3actionreward
Int64Int64Int64Int64Int64Int64Int64Int64Float64
11002711025100.838122
210024445056100.639387
310029138611000.416196
410023655126100.384344
5100231212942100.37681
6100229514158100.641975
71012921846100.225288
8101277234500.335901
9101213161131000.993489
1010124731991000.402383
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccccc}\n", + "\t& day & hour & minute & dow & sensor1 & sensor2 & sensor3 & action & reward\\\\\n", + "\t\\hline\n", + "\t& Int64 & Int64 & Int64 & Int64 & Int64 & Int64 & Int64 & Int64 & Float64\\\\\n", + "\t\\hline\n", + "\t1 & 1 & 0 & 0 & 2 & 7 & 110 & 25 & 10 & 0.838122 \\\\\n", + "\t2 & 1 & 0 & 0 & 2 & 444 & 50 & 56 & 10 & 0.639387 \\\\\n", + "\t3 & 1 & 0 & 0 & 2 & 9 & 138 & 61 & 100 & 0.416196 \\\\\n", + "\t4 & 1 & 0 & 0 & 2 & 365 & 51 & 26 & 10 & 0.384344 \\\\\n", + "\t5 & 1 & 0 & 0 & 2 & 312 & 129 & 42 & 10 & 0.37681 \\\\\n", + "\t6 & 1 & 0 & 0 & 2 & 295 & 141 & 58 & 10 & 0.641975 \\\\\n", + "\t7 & 1 & 0 & 1 & 2 & 92 & 18 & 46 & 10 & 0.225288 \\\\\n", + "\t8 & 1 & 0 & 1 & 2 & 77 & 23 & 4 & 50 & 0.335901 \\\\\n", + "\t9 & 1 & 0 & 1 & 2 & 131 & 61 & 13 & 100 & 0.993489 \\\\\n", + "\t10 & 1 & 0 & 1 & 2 & 473 & 1 & 99 & 100 & 0.402383 \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m10×9 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m day \u001b[0m\u001b[1m hour \u001b[0m\u001b[1m minute \u001b[0m\u001b[1m dow \u001b[0m\u001b[1m sensor1 \u001b[0m\u001b[1m sensor2 \u001b[0m\u001b[1m sensor3 \u001b[0m\u001b[1m action \u001b[0m\u001b[1m reward \u001b[0m ⋯\n", + " │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Float64\u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ 1 0 0 2 7 110 25 10 0.83812 ⋯\n", + " 2 │ 1 0 0 2 444 50 56 10 0.63938\n", + " 3 │ 1 0 0 2 9 138 61 100 0.41619\n", + " 4 │ 1 0 0 2 365 51 26 10 0.38434\n", + " 5 │ 1 0 0 2 312 129 42 10 0.37681 ⋯\n", + " 6 │ 1 0 0 2 295 141 58 10 0.64197\n", + " 7 │ 1 0 1 2 92 18 46 10 0.22528\n", + " 8 │ 1 0 1 2 77 23 4 50 0.33590\n", + " 9 │ 1 0 1 2 131 61 13 100 0.99348 ⋯\n", + " 10 │ 1 0 1 2 473 1 99 100 0.40238\n", + "\u001b[36m 1 column omitted\u001b[0m" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path = pkgdir(AutoOfflineRL)\n", + "dataset = \"$path/data/smalldata.parquet\"\n", + "df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing\n", + "first(df,10)" + ] + }, + { + "cell_type": "markdown", + "id": "0a33d803", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Convert dataframe to MDP dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "3f64fca5-4adf-4030-909d-49f74bf98bb4", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "srow,_ = size(df)\n", + "\n", + "reward = df[:,[\"reward\"]] |> deepcopy |> DataFrame\n", + "action = df[:,[\"action\"]] |> deepcopy |> DataFrame\n", + "_terminals = zeros(Int,srow)\n", + "_terminals[collect(100:1000:9000)] .= 1\n", + "_terminals[end] = 1\n", + "terminaldf = DataFrame(terminal=_terminals)\n", + "\n", + "observation = df[:, [\"day\", \"hour\", \"minute\", \"dow\", \"sensor1\", \"sensor2\", \"sensor3\"]]\n", + "action_reward_terminal = DataFrame[action, reward, terminaldf];" + ] + }, + { + "cell_type": "markdown", + "id": "501cd92c", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Recalling the AutoML Pipeline Workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "5b038b45", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
3×7 DataFrame
Rowx1x2x3x4x5x6x7
Float64Float64Float64Float64Float64Float64Float64
1-0.396769-1.38433-1.69854-0.396769-1.689990.172168-0.878338
2-0.396769-1.38433-1.69854-0.3967691.34805-0.868080.200461
3-0.396769-1.38433-1.69854-0.396769-1.676090.6576170.374461
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& x1 & x2 & x3 & x4 & x5 & x6 & x7\\\\\n", + "\t\\hline\n", + "\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64\\\\\n", + "\t\\hline\n", + "\t1 & -0.396769 & -1.38433 & -1.69854 & -0.396769 & -1.68999 & 0.172168 & -0.878338 \\\\\n", + "\t2 & -0.396769 & -1.38433 & -1.69854 & -0.396769 & 1.34805 & -0.86808 & 0.200461 \\\\\n", + "\t3 & -0.396769 & -1.38433 & -1.69854 & -0.396769 & -1.67609 & 0.657617 & 0.374461 \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m3×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m x1 \u001b[0m\u001b[1m x2 \u001b[0m\u001b[1m x3 \u001b[0m\u001b[1m x4 \u001b[0m\u001b[1m x5 \u001b[0m\u001b[1m x6 \u001b[0m\u001b[1m x7 \u001b[0m ⋯\n", + " │\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ -0.396769 -1.38433 -1.69854 -0.396769 -1.68999 0.172168 -0.87833 ⋯\n", + " 2 │ -0.396769 -1.38433 -1.69854 -0.396769 1.34805 -0.86808 0.20046\n", + " 3 │ -0.396769 -1.38433 -1.69854 -0.396769 -1.67609 0.657617 0.37446\n", + "\u001b[36m 1 column omitted\u001b[0m" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mypipeline = numf |> std\n", + "tr = fit_transform!(mypipeline,observation)\n", + "first(tr,3)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "7f4ea5c1", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
3×7 DataFrame
Rowx1x2x3x4x5x6x7
Float64Float64Float64Float64Float64Float64Float64
10.1620330.2835710.320088-0.7601662.386471.51367-4.96579e-17
20.158823-1.504361.51579-0.755993-0.229611.58652-3.16875e-17
30.1638280.1513780.7543640.5027952.336431.47229-4.6244e-17
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& x1 & x2 & x3 & x4 & x5 & x6 & x7\\\\\n", + "\t\\hline\n", + "\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64\\\\\n", + "\t\\hline\n", + "\t1 & 0.162033 & 0.283571 & 0.320088 & -0.760166 & 2.38647 & 1.51367 & -4.96579e-17 \\\\\n", + "\t2 & 0.158823 & -1.50436 & 1.51579 & -0.755993 & -0.22961 & 1.58652 & -3.16875e-17 \\\\\n", + "\t3 & 0.163828 & 0.151378 & 0.754364 & 0.502795 & 2.33643 & 1.47229 & -4.6244e-17 \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m3×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m x1 \u001b[0m\u001b[1m x2 \u001b[0m\u001b[1m x3 \u001b[0m\u001b[1m x4 \u001b[0m\u001b[1m x5 \u001b[0m\u001b[1m x6 \u001b[0m\u001b[1m x7 \u001b[0m ⋯\n", + " │\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m ⋯\n", + "─────┼──────────────────────────────────────────────────────────────────────────\n", + " 1 │ 0.162033 0.283571 0.320088 -0.760166 2.38647 1.51367 -4.96579e- ⋯\n", + " 2 │ 0.158823 -1.50436 1.51579 -0.755993 -0.22961 1.58652 -3.16875e-\n", + " 3 │ 0.163828 0.151378 0.754364 0.502795 2.33643 1.47229 -4.6244e-1\n", + "\u001b[36m 1 column omitted\u001b[0m" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mypipeline = numf |> std |> pca\n", + "tr = fit_transform!(mypipeline,observation)\n", + "first(tr,3)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "86345f9f", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fold: 1, 33.800000000000004\n", + "fold: 2, 31.2\n", + "fold: 3, 31.2\n", + "fold: 4, 31.5\n", + "fold: 5, 33.5\n", + "fold: 6, 33.033033033033036\n", + "fold: 7, 34.0\n", + "fold: 8, 32.300000000000004\n", + "fold: 9, 34.2\n", + "fold: 10, 34.9\n", + "errors: 0\n" + ] + }, + { + "data": { + "text/plain": [ + "(mean = 32.9633033033033, std = 1.3394702396340163, folds = 10, errors = 0)" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mypipeline = numf |> std |> pca |> rf\n", + "perf = crossvalidate(mypipeline,observation,action.action)" + ] + }, + { + "cell_type": "markdown", + "id": "d61ab853", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Check crossvalidation performance of an NFQ agent" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "bc198fc3-95a9-4c4e-a833-4127ed98f4c7", + "metadata": { + "collapsed": true, + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "Epoch 1/3: 0%| | 0/218 [00:00 mx |> pca) |> nfq\n", + "tderror=crossvalidateRL(pipe,observation,action_reward_terminal)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "55fcbbb9", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "4.7952334405060536e23" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tderror" + ] + }, + { + "cell_type": "markdown", + "id": "8f81a1c3", + "metadata": {}, + "source": [ + "The TD error is the difference between the agent's current estimate and target value" + ] + }, + { + "cell_type": "markdown", + "id": "1dd8555b", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Check validation performance of DQN agent" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "2f28232e", + "metadata": { + "collapsed": true, + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n", + " warnings.warn(\n", + "/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:123: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " warnings.warn(\n", + "\r", + "Epoch 1/3: 0%| | 0/218 [00:00 std |> ica) |> dqn\n", + "tderror=crossvalidateRL(pipe,observation,action_reward_terminal)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "57dfacef", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.2257244226676562" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tderror" + ] + }, + { + "cell_type": "markdown", + "id": "b40ec59d", + "metadata": {}, + "source": [ + "The TD error is the difference between the agent's current estimate and target value" + ] + }, + { + "cell_type": "markdown", + "id": "6a9ec189", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Find optimal OfflineRL pipeline in parallel" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "633319f7", + "metadata": { + "collapsed": true, + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 2:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/DiscreteCQL_20230621091739\n", + " From worker 2:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/DiscreteCQL_20230621091739/params.json params={'action_scaler': None, 'alpha': 1.0, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.77796113, 1.3371367 , 1.6107398 , 1.4524562 , 1.7445312 ,\n", + " From worker 2:\t 0.6457107 , 0.5352664 ]], dtype=float32), 'minimum': array([[-1.0338004 , -1.4315552 , -1.647037 , -1.5634333 , -1.7121451 ,\n", + " From worker 2:\t -0.6054753 , -0.31666145]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DiscreteCQL', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 8:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 8:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/DQN_20230621091739\n", + " From worker 8:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/NFQ_20230621091739\n", + " From worker 3:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 8:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 8:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 8:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091739/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.7926923, 1.3371367, 1.6107398, 1.399293 , 1.7445312, 0.6543074,\n", + " From worker 8:\t 0.5231041]], dtype=float32), 'minimum': array([[-0.9144198, -1.320497 , -1.6457819, -1.4033899, -1.7121451,\n", + " From worker 8:\t -0.5812454, -0.2982593]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 3:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/NFQ_20230621091739/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911 , 1.5049787 , 1.7117655 , 1.6942327 , 1.9783719 ,\n", + " From worker 3:\t 0.6704674 , 0.49065414]], dtype=float32), 'minimum': array([[-1.2627224, -1.320497 , -1.8827811, -1.7183032, -1.675721 ,\n", + " From worker 3:\t -0.6269234, -1.5822366]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'NFQ', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█████████████████| 218/218 [00:00<00:00, 373.42it/s, loss=3.83]\n", + " From worker 4:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/DoubleDQN_20230621091739\n", + " From worker 4:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/DoubleDQN_20230621091739/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.7000638, 1.5026734, 1.7445312, 0.6457107,\n", + " From worker 4:\t 0.9709042]], dtype=float32), 'minimum': array([[-1.2627224 , -1.4646739 , -1.9264166 , -1.5791949 , -1.7102567 ,\n", + " From worker 4:\t -0.84071213, -1.0527257 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DoubleDQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 5:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 5:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091739\n", + " From worker 5:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091739/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[7.9269230e-01, 1.3371367e+00, 1.6107398e+00, 1.5551701e+00,\n", + " From worker 5:\t 1.7445312e+00, 4.1851148e-01, 3.9809948e-16]], dtype=float32), 'minimum': array([[-9.1441977e-01, -1.3695900e+00, -1.6457819e+00, -1.4034406e+00,\n", + " From worker 5:\t -1.6697341e+00, -6.5430743e-01, 5.7550873e-17]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 6:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/DiscreteBCQ_20230621091739\n", + " From worker 6:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 7:\t2023-06-21 09:17:39 [debug ] RoundIterator is selected.\n", + " From worker 7:\t2023-06-21 09:17:39 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091739\n", + " From worker 7:\t2023-06-21 09:17:39 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 7:\t2023-06-21 09:17:39 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 7:\t2023-06-21 09:17:39 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/DiscreteBCQ_20230621091739/params.json params={'action_flexibility': 0.3, 'action_scaler': None, 'batch_size': 32, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.6107398, 1.5380808, 1.7445312, 0.8475338,\n", + " From worker 6:\t 0.9709042]], dtype=float32), 'minimum': array([[-1.2627224, -1.4646739, -1.9264166, -1.5126448, -1.7102567,\n", + " From worker 6:\t -0.6457107, -1.0527257]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DiscreteBCQ', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 7:\t2023-06-21 09:17:39 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091739/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.4646739, 1.920096 , 1.5126448, 1.9783719, 0.6543074,\n", + " From worker 7:\t 1.1772403]], dtype=float32), 'minimum': array([[-1.2627224 , -1.5049787 , -1.6457819 , -1.5148648 , -1.7121451 ,\n", + " From worker 7:\t -0.81132424, -0.9709042 ]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t2023-06-21 09:17:39 [info ] DiscreteCQL_20230621091739: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 4.510704530488461e-05, 'time_algorithm_update': 0.00259503093334513, 'loss': 3.8033027670799044, 'time_step': 0.002666063264969292, 'td_error': 0.34984880769006144} step=218\n", + " From worker 2:\t2023-06-21 09:17:39 [info ] Model parameters are saved to d3rlpy_logs/DiscreteCQL_20230621091739/model_218.pt\n", + "Epoch 1/1: 100%|███████████████████| 69/69 [00:00<00:00, 402.88it/s, loss=2.29]\n", + " From worker 7:\terror in DiscreteBC\n", + "Epoch 1/1: 100%|███████████████| 190/190 [00:00<00:00, 446.94it/s, loss=0.0825]\n", + " From worker 2:\t2023-06-21 09:17:40 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:40 [info ] Directory is created at d3rlpy_logs/DiscreteCQL_20230621091740\n", + " From worker 2:\t2023-06-21 09:17:40 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:40 [debug ] Building models...\n", + "Epoch 1/1: 100%|████████████████| 218/218 [00:00<00:00, 435.33it/s, loss=0.197]\n", + " From worker 2:\t2023-06-21 09:17:40 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:40 [info ] Parameters are saved to d3rlpy_logs/DiscreteCQL_20230621091740/params.json params={'action_scaler': None, 'alpha': 1.0, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.7926923, 1.3371367, 1.6107398, 1.4524562, 1.7445312, 0.6543074,\n", + " From worker 2:\t 0.5231041]], dtype=float32), 'minimum': array([[-1.0338004 , -1.4315552 , -1.647037 , -1.5634333 , -1.7121451 ,\n", + " From worker 2:\t -0.5812454 , -0.31666145]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DiscreteCQL', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 8:\t2023-06-21 09:17:40 [info ] DQN_20230621091739: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 4.936017488178454e-05, 'time_algorithm_update': 0.0021454158582185443, 'loss': 0.08067038490584022, 'time_step': 0.002224294762862356, 'td_error': 0.349530275497165} step=190\n", + " From worker 8:\t2023-06-21 09:17:40 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091739/model_190.pt\n", + "Epoch 1/1: 100%|███████████████| 246/246 [00:00<00:00, 462.45it/s, loss=0.0699]\n", + " From worker 3:\t2023-06-21 09:17:40 [info ] NFQ_20230621091739: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 5.499157336873746e-05, 'time_algorithm_update': 0.0022020449332141, 'loss': 0.19771507529353877, 'time_step': 0.0022861465401605727, 'td_error': 0.4070726314259975} step=218\n", + " From worker 3:\t2023-06-21 09:17:40 [info ] Model parameters are saved to d3rlpy_logs/NFQ_20230621091739/model_218.pt\n", + " From worker 4:\t2023-06-21 09:17:40 [info ] DoubleDQN_20230621091739: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 5.0197771894253363e-05, 'time_algorithm_update': 0.002068161964416504, 'loss': 0.06939889395200624, 'time_step': 0.0021510085439294334, 'td_error': 0.3295037820149992} step=246\n", + " From worker 4:\t2023-06-21 09:17:40 [info ] Model parameters are saved to d3rlpy_logs/DoubleDQN_20230621091739/model_246.pt\n", + "Epoch 1/1: 100%|█| 95/95 [00:00<00:00, 141.16it/s, temp_loss=-4.58, temp=1.01, \n", + " From worker 8:\t2023-06-21 09:17:40 [debug ] RoundIterator is selected.\n", + " From worker 8:\t2023-06-21 09:17:40 [info ] Directory is created at d3rlpy_logs/DQN_20230621091740\n", + " From worker 8:\t2023-06-21 09:17:40 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 8:\t2023-06-21 09:17:40 [debug ] Building models...\n", + " From worker 8:\t2023-06-21 09:17:40 [debug ] Models have been built.\n", + " From worker 8:\t2023-06-21 09:17:40 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091740/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 2.2819278, 1.5126448, 1.7445312, 0.6543074,\n", + " From worker 8:\t 1.0527257]], dtype=float32), 'minimum': array([[-1.2627224 , -1.4646739 , -1.6457819 , -1.5634333 , -1.7121451 ,\n", + " From worker 8:\t -0.94803315, -0.9709042 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 5:\t2023-06-21 09:17:40 [info ] DiscreteSAC_20230621091739: epoch=1 step=95 epoch=1 metrics={'time_sample_batch': 8.008103621633429e-05, 'time_algorithm_update': 0.006943519491898386, 'temp_loss': -4.5871779692800425, 'temp': 1.0145784817243877, 'critic_loss': 16517171348490.78, 'actor_loss': -15285661198961.18, 'time_step': 0.007063471643548262, 'td_error': 9.846342275558801e+26} step=95\n", + " From worker 5:\t2023-06-21 09:17:40 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091739/model_95.pt\n", + " From worker 3:\t2023-06-21 09:17:40 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:40 [info ] Directory is created at d3rlpy_logs/NFQ_20230621091740\n", + " From worker 3:\t2023-06-21 09:17:40 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:40 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:40 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:40 [info ] Parameters are saved to d3rlpy_logs/NFQ_20230621091740/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[7.9787636e-01, 1.3371367e+00, 1.6107398e+00, 1.5502471e+00,\n", + " From worker 3:\t 1.7445312e+00, 6.5430743e-01, 3.9809948e-16]], dtype=float32), 'minimum': array([[-9.1441977e-01, -1.3088119e+00, -1.5569713e+00, -1.6544964e+00,\n", + " From worker 3:\t -1.6697341e+00, -5.2033156e-01, -1.6346980e-15]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'NFQ', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█████████████████| 246/246 [00:00<00:00, 288.89it/s, loss=3.51]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 4:\t2023-06-21 09:17:40 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:17:40 [info ] Directory is created at d3rlpy_logs/DoubleDQN_20230621091740\n", + " From worker 4:\t2023-06-21 09:17:40 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:40 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:40 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:17:40 [info ] Parameters are saved to d3rlpy_logs/DoubleDQN_20230621091740/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.6571574, 1.3371367, 1.7000638, 1.5026734, 1.7445312, 0.6543074,\n", + " From worker 4:\t 0.5446731]], dtype=float32), 'minimum': array([[-1.0338004 , -1.4315552 , -1.647037 , -1.4524562 , -1.7102567 ,\n", + " From worker 4:\t -0.57227874, -0.27350825]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DoubleDQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|██████████████████| 190/190 [00:00<00:00, 358.94it/s, loss=3.9]\n", + " From worker 2:\t2023-06-21 09:17:40 [info ] DiscreteCQL_20230621091740: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 4.539113295705695e-05, 'time_algorithm_update': 0.0027033605073627674, 'loss': 3.856140062683507, 'time_step': 0.0027752098284269636, 'td_error': 0.34433310867649836} step=190\n", + " From worker 2:\t2023-06-21 09:17:40 [info ] Model parameters are saved to d3rlpy_logs/DiscreteCQL_20230621091740/model_190.pt\n", + " From worker 2:\t2023-06-21 09:17:40 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:17:40 [info ] DiscreteBCQ_20230621091739: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 5.255385143000905e-05, 'time_algorithm_update': 0.003364869249545462, 'loss': 3.489266607819534, 'time_step': 0.003448172313411061, 'td_error': 0.33191796381841476} step=246\n", + " From worker 6:\t2023-06-21 09:17:40 [info ] Model parameters are saved to d3rlpy_logs/DiscreteBCQ_20230621091739/model_246.pt\n", + " From worker 5:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 5:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091741\n", + " From worker 5:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091741/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[7.9269230e-01, 1.3371367e+00, 1.6107398e+00, 1.6721407e+00,\n", + " From worker 5:\t 1.7445312e+00, 5.2033156e-01, 3.9809948e-16]], dtype=float32), 'minimum': array([[-1.0338004e+00, -1.3695900e+00, -1.6457819e+00, -1.4369258e+00,\n", + " From worker 5:\t -1.6697341e+00, -6.5430743e-01, -1.6346980e-15]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|████████████████| 218/218 [00:00<00:00, 534.99it/s, loss=0.086]\n", + " From worker 8:\t2023-06-21 09:17:41 [info ] DQN_20230621091740: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 4.157998146267112e-05, 'time_algorithm_update': 0.0017916532831454496, 'loss': 0.08464137135808347, 'time_step': 0.0018595686746299815, 'td_error': 0.3356154156937909} step=218\n", + "Epoch 1/1: 100%|██████████████| 190/190 [00:00<00:00, 511.98it/s, loss=6.3e+12]\n", + " From worker 8:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091740/model_218.pt\n", + " From worker 8:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 8:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/DQN_20230621091741\n", + " From worker 8:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 8:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 8:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 8:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091741/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 2.2819278, 1.5126448, 1.7445312, 0.6543074,\n", + " From worker 8:\t 1.0527257]], dtype=float32), 'minimum': array([[-1.2627224 , -1.4646739 , -1.6457819 , -1.5634333 , -1.7121451 ,\n", + " From worker 8:\t -0.94803315, -0.9709042 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] NFQ_20230621091740: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 4.245858443410773e-05, 'time_algorithm_update': 0.0018767607839483963, 'loss': 6301005974905.263, 'time_step': 0.0019441830484490646, 'td_error': 1.069057827735549e+26} step=190\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/NFQ_20230621091740/model_190.pt\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/NFQ_20230621091741\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/NFQ_20230621091741/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.7117655, 1.5502471, 1.9783719, 0.6704674,\n", + " From worker 3:\t 0.4620808]], dtype=float32), 'minimum': array([[-1.2627224, -1.320497 , -1.8827811, -1.7183032, -1.675721 ,\n", + " From worker 3:\t -0.6269234, -1.5822366]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'NFQ', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 190/190 [00:00<00:00, 558.95it/s, loss=0.0826]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 4:\t2023-06-21 09:17:41 [info ] DoubleDQN_20230621091740: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 4.182489294754831e-05, 'time_algorithm_update': 0.001713250812731291, 'loss': 0.08083449318808944, 'time_step': 0.0017797846543161492, 'td_error': 0.3419192071373942} step=190\n", + " From worker 4:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/DoubleDQN_20230621091740/model_190.pt\n", + " From worker 4:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/DoubleDQN_20230621091741\n", + " From worker 4:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/DiscreteBCQ_20230621091741\n", + " From worker 6:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/DoubleDQN_20230621091741/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.7000638, 1.5026734, 1.7445312, 0.6457107,\n", + " From worker 4:\t 0.9709042]], dtype=float32), 'minimum': array([[-1.2627224 , -1.4646739 , -1.9264166 , -1.5791949 , -1.7102567 ,\n", + " From worker 4:\t -0.84071213, -1.0527257 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DoubleDQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 6:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/DiscreteBCQ_20230621091741/params.json params={'action_flexibility': 0.3, 'action_scaler': None, 'batch_size': 32, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.7926923 , 1.3023561 , 1.6044478 , 1.4033899 , 1.7445312 ,\n", + " From worker 6:\t 0.57624644, 0.5446731 ]], dtype=float32), 'minimum': array([[-1.0338004 , -1.4315552 , -1.647037 , -1.4524562 , -1.7102567 ,\n", + " From worker 6:\t -0.6543074 , -0.27350825]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DiscreteBCQ', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 536.47it/s, loss=0.0956]\n", + " From worker 8:\t2023-06-21 09:17:41 [info ] DQN_20230621091741: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 4.1442179898603244e-05, 'time_algorithm_update': 0.0017878757704288587, 'loss': 0.09385682608282894, 'time_step': 0.0018546964050432959, 'td_error': 0.35432347418705135} step=218\n", + " From worker 8:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091741/model_218.pt\n", + "Epoch 1/1: 100%|████████████████| 218/218 [00:00<00:00, 516.61it/s, loss=0.223]\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] NFQ_20230621091741: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 4.212025108687375e-05, 'time_algorithm_update': 0.0018612518223053817, 'loss': 0.22377619776157065, 'time_step': 0.0019271406558675503, 'td_error': 0.49007505557494097} step=218\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/NFQ_20230621091741/model_218.pt\n", + "Epoch 1/1: 100%|█| 95/95 [00:00<00:00, 160.76it/s, temp_loss=-4.58, temp=1.01, \n", + "Epoch 1/1: 100%|███████████████| 246/246 [00:00<00:00, 546.30it/s, loss=0.0707]\n", + " From worker 5:\t2023-06-21 09:17:41 [info ] DiscreteSAC_20230621091741: epoch=1 step=95 epoch=1 metrics={'time_sample_batch': 6.808732685289885e-05, 'time_algorithm_update': 0.006103771611263877, 'temp_loss': -4.5871779692800425, 'temp': 1.0145784817243877, 'critic_loss': 3907441664086.2314, 'actor_loss': -4449662659185.179, 'time_step': 0.00620519989415219, 'td_error': 2.3705497272879246e+25} step=95\n", + " From worker 4:\t2023-06-21 09:17:41 [info ] DoubleDQN_20230621091741: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 4.2355157495514165e-05, 'time_algorithm_update': 0.001753175161718353, 'loss': 0.07013292414537532, 'time_step': 0.001821445255744748, 'td_error': 0.3307183182128285} step=246\n", + " From worker 4:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/DoubleDQN_20230621091741/model_246.pt\n", + " From worker 5:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091741/model_95.pt\n", + " From worker 5:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091741\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091741/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9381961 , 1.1401778 , 0.7587837 , 0.4004507 , 0.4045163 ,\n", + " From worker 3:\t 0.22754474, 0.77696633]], dtype=float32), 'minimum': array([[-0.46346295, -0.37498793, -0.32135844, -0.3676697 , -0.50954545,\n", + " From worker 3:\t -0.48041272, -0.51314104]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 4:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|███████████████████| 69/69 [00:00<00:00, 609.74it/s, loss=2.29]\n", + "Epoch 1/1: 100%|█████████████████| 190/190 [00:00<00:00, 304.40it/s, loss=3.91]\n", + " From worker 3:\terror in DiscreteBC\n", + " From worker 6:\t2023-06-21 09:17:41 [info ] DiscreteBCQ_20230621091741: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 4.2665632147538036e-05, 'time_algorithm_update': 0.0032078454368992854, 'loss': 3.859865134640744, 'time_step': 0.003275139708268015, 'td_error': 0.3572662849078008} step=190\n", + " From worker 6:\t2023-06-21 09:17:41 [info ] Model parameters are saved to d3rlpy_logs/DiscreteBCQ_20230621091741/model_190.pt\n", + " From worker 6:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:41 [info ] Directory is created at d3rlpy_logs/DiscreteCQL_20230621091741\n", + " From worker 2:\t2023-06-21 09:17:41 [debug ] Fitting scaler... scaler=min_max\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 2:\t2023-06-21 09:17:41 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:41 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:41 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:41 [info ] Parameters are saved to d3rlpy_logs/DiscreteCQL_20230621091741/params.json params={'action_scaler': None, 'alpha': 1.0, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.6107398, 1.5126448, 1.7445312, 0.6543074,\n", + " From worker 2:\t 0.9709042]], dtype=float32), 'minimum': array([[-1.2627224, -1.4646739, -1.9264166, -1.5380808, -1.7121451,\n", + " From worker 2:\t -0.8488413, -1.0527257]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DiscreteCQL', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█████████████████| 218/218 [00:00<00:00, 581.26it/s, loss=3.76]\n", + " From worker 2:\t2023-06-21 09:17:42 [info ] DiscreteCQL_20230621091741: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.6314630420929793e-05, 'time_algorithm_update': 0.0016727053791011145, 'loss': 3.727580152520346, 'time_step': 0.0017157727425251533, 'td_error': 0.3510580793225154} step=218\n", + " From worker 2:\t2023-06-21 09:17:42 [info ] Model parameters are saved to d3rlpy_logs/DiscreteCQL_20230621091741/model_218.pt\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 5:\t2023-06-21 09:17:42 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091742\n", + " From worker 5:\t2023-06-21 09:17:42 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:42 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:42 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:42 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091742/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.6107398, 1.5791949, 1.7445312, 0.8238525,\n", + " From worker 5:\t 0.9709042]], dtype=float32), 'minimum': array([[-1.2627224, -1.4646739, -1.9264166, -1.4369258, -1.675721 ,\n", + " From worker 5:\t -0.6543074, -1.0527257]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 4:\t2023-06-21 09:17:42 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091742\n", + " From worker 4:\t2023-06-21 09:17:42 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:42 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:42 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:17:42 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091742/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1. , 0.9285714, 1. , 1. , 1.0121458, 1. ,\n", + " From worker 4:\t 1. ]], dtype=float32), 'minimum': array([[ 0. , -0.71428573, -0.96666664, 0. , -1.0080972 ,\n", + " From worker 4:\t -0.99 , -0.98 ]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:123: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 2:\t2023-06-21 09:17:42 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:42 [info ] Directory is created at d3rlpy_logs/DQN_20230621091742\n", + " From worker 2:\t2023-06-21 09:17:42 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:42 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:42 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:42 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091742/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.1024223e+12, 7.7798225e+11, 3.9527328e+12, 2.1446488e+12,\n", + " From worker 2:\t 6.3687953e+12, 2.6677488e+12, 4.8132826e+12]], dtype=float32), 'minimum': array([[-3.1024223e+12, -7.7798225e+11, -3.9527328e+12, -2.1446488e+12,\n", + " From worker 2:\t -7.4621214e+12, -2.8364922e+12, -3.2294689e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 6:\t2023-06-21 09:17:42 [info ] Directory is created at d3rlpy_logs/DiscreteBCQ_20230621091742\n", + " From worker 6:\t2023-06-21 09:17:42 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:17:42 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:17:42 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:17:42 [info ] Parameters are saved to d3rlpy_logs/DiscreteBCQ_20230621091742/params.json params={'action_flexibility': 0.3, 'action_scaler': None, 'batch_size': 32, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.7241911, 1.5049787, 1.6107398, 1.5380808, 1.7445312, 0.8475338,\n", + " From worker 6:\t 0.9709042]], dtype=float32), 'minimum': array([[-1.2627224, -1.4646739, -1.9264166, -1.5126448, -1.7102567,\n", + " From worker 6:\t -0.6457107, -1.0527257]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DiscreteBCQ', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████████| 69/69 [00:00<00:00, 784.02it/s, loss=2.33]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 4:\terror in DiscreteBC\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:17:43 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091743\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:17:43 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091743/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.7828405, 2.7306168, 2.8765562, 2.9279127, 4.3325047, 1.8230203,\n", + " From worker 4:\t 2.4710565]], dtype=float32), 'minimum': array([[-1.703448 , -2.7389183, -4.2741866, -4.1235604, -2.9592102,\n", + " From worker 4:\t -2.8719223, -2.5508852]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████████| 69/69 [00:00<00:00, 632.32it/s, loss=2.32]\n", + " From worker 4:\terror in DiscreteBC\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:17:43 [info ] Directory is created at d3rlpy_logs/DQN_20230621091743\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] Models have been built.\n", + "Epoch 1/1: 100%|████████████████| 246/246 [00:00<00:00, 619.05it/s, loss=0.069]\n", + " From worker 4:\t2023-06-21 09:17:43 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091743/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0. , 0.85714287, 1. , 0. , 1.0121458 ,\n", + " From worker 4:\t 1. , 1. ]], dtype=float32), 'minimum': array([[ 0. , -0.71428573, -0.96666664, 0. , -1.0080972 ,\n", + " From worker 4:\t -0.99 , -0.98 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t2023-06-21 09:17:43 [info ] DQN_20230621091742: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 4.376337780215876e-05, 'time_algorithm_update': 0.0015390084041812557, 'loss': 0.06846761285531812, 'time_step': 0.001607822208869748, 'td_error': 0.3295860414898109} step=246\n", + " From worker 2:\t2023-06-21 09:17:43 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091742/model_246.pt\n", + " From worker 2:\t2023-06-21 09:17:43 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 175.98it/s, temp_loss=-.55, temp=0.993\n", + " From worker 5:\t2023-06-21 09:17:43 [info ] DiscreteSAC_20230621091742: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 6.348277450701512e-05, 'time_algorithm_update': 0.005577789534122572, 'temp_loss': -0.5449240399917604, 'temp': 0.9939265469892309, 'critic_loss': 4.354956706729504, 'actor_loss': -4.907961683535794, 'time_step': 0.005669665992806811, 'td_error': 0.3095081326876401} step=109\n", + " From worker 5:\t2023-06-21 09:17:43 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091742/model_109.pt\n", + " From worker 5:\t2023-06-21 09:17:43 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|██████████████████| 190/190 [00:00<00:00, 720.48it/s, loss=nan]\n", + "Epoch 1/1: 100%|█████████████████| 246/246 [00:00<00:00, 365.47it/s, loss=3.53]\n", + " From worker 4:\t2023-06-21 09:17:43 [info ] DQN_20230621091743: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 3.513787922106291e-05, 'time_algorithm_update': 0.0013250714854190224, 'loss': nan, 'time_step': 0.0013815679048237047, 'td_error': nan} step=190\n", + " From worker 4:\t2023-06-21 09:17:43 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091743/model_190.pt\n", + " From worker 4:\t2023-06-21 09:17:43 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:17:43 [info ] DiscreteBCQ_20230621091742: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 3.9584268399370396e-05, 'time_algorithm_update': 0.0026645340570589393, 'loss': 3.5020599316775316, 'time_step': 0.002727894279045787, 'td_error': 0.33054662400169754} step=246\n", + " From worker 6:\t2023-06-21 09:17:43 [info ] Model parameters are saved to d3rlpy_logs/DiscreteBCQ_20230621091742/model_246.pt\n", + " From worker 6:\t2023-06-21 09:17:43 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:44 [info ] Directory is created at d3rlpy_logs/DQN_20230621091744\n", + " From worker 2:\t2023-06-21 09:17:44 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:44 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:44 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:44 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091744/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.1024223e+12, 7.7798225e+11, 3.9527328e+12, 2.1446488e+12,\n", + " From worker 2:\t 6.3067469e+12, 2.6677488e+12, 4.8132826e+12]], dtype=float32), 'minimum': array([[-3.1024223e+12, -7.7798225e+11, -3.9527328e+12, -2.1446488e+12,\n", + " From worker 2:\t -7.4621214e+12, -2.8364922e+12, -3.2294689e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 5:\t2023-06-21 09:17:44 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091744\n", + " From worker 5:\t2023-06-21 09:17:44 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:44 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:44 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:44 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091744/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.3023418 , 0.7144378 , 0.85504866, 0.8611191 , 0.7454196 ,\n", + " From worker 5:\t 0.5472575 , 0.28775597]], dtype=float32), 'minimum': array([[-0.60449946, -0.7369663 , -1.4143645 , -1.3660418 , -0.81751764,\n", + " From worker 5:\t -0.5384156 , -0.20355542]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1: 100%|███████████████████| 78/78 [00:00<00:00, 838.04it/s, loss=2.26]\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 799.78it/s, loss=0.0928]\n", + " From worker 2:\t2023-06-21 09:17:44 [info ] DQN_20230621091744: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 3.022557004876093e-05, 'time_algorithm_update': 0.001195727138344301, 'loss': 0.09131083575102987, 'time_step': 0.0012449113600844638, 'td_error': 0.33717236146601487} step=218\n", + " From worker 2:\t2023-06-21 09:17:44 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091744/model_218.pt\n", + " From worker 2:\t2023-06-21 09:17:44 [debug ] RoundIterator is selected.\n", + " From worker 5:\terror in DiscreteBC\n", + " From worker 5:\t2023-06-21 09:17:44 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:17:45 [info ] Directory is created at d3rlpy_logs/DQN_20230621091745\n", + " From worker 4:\t2023-06-21 09:17:45 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:17:45 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:17:45 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:17:45 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091745/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1. , 0.9285714, 1. , 1. , 1.0121458, 1. ,\n", + " From worker 4:\t 1. ]], dtype=float32), 'minimum': array([[ 0. , -0.71428573, -0.96666664, 0. , -1.0080972 ,\n", + " From worker 4:\t -0.99 , -0.98 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 6:\t2023-06-21 09:17:45 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091745\n", + " From worker 6:\t2023-06-21 09:17:45 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:17:45 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:17:45 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:17:45 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091745/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[250.33273 , 260.5288 , 50.23776 , 29.988697, 10.523216,\n", + " From worker 6:\t 99.29697 , 73.98687 ]], dtype=float32), 'minimum': array([[-249.43936 , -274.04565 , -49.83971 , -30.540922, -12.57127 ,\n", + " From worker 6:\t -100.146736, -233.99266 ]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████████| 69/69 [00:00<00:00, 838.29it/s, loss=2.22]\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 784.27it/s, loss=0.0959]\n", + " From worker 6:\terror in DiscreteBC\n", + " From worker 4:\t2023-06-21 09:17:45 [info ] DQN_20230621091745: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.9512501637870018e-05, 'time_algorithm_update': 0.001221386664504305, 'loss': 0.09440876843424839, 'time_step': 0.0012693536390952014, 'td_error': 0.34156056917367106} step=218\n", + " From worker 4:\t2023-06-21 09:17:45 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091745/model_218.pt\n", + " From worker 4:\t2023-06-21 09:17:45 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:17:45 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:46 [info ] Directory is created at d3rlpy_logs/DQN_20230621091746\n", + " From worker 2:\t2023-06-21 09:17:46 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:46 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:46 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:46 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091746/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.1024223e+12, 7.7798225e+11, 3.9527328e+12, 2.1446488e+12,\n", + " From worker 2:\t 5.5217808e+12, 2.6677488e+12, 4.8132826e+12]], dtype=float32), 'minimum': array([[-3.1024223e+12, -7.7798225e+11, -3.9527328e+12, -2.1446488e+12,\n", + " From worker 2:\t -7.4621214e+12, -2.8364922e+12, -3.2294689e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 3:\t2023-06-21 09:17:46 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091746\n", + " From worker 3:\t2023-06-21 09:17:46 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:46 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:46 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:46 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091746/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32), 'minimum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 246/246 [00:00<00:00, 838.96it/s, loss=0.0849]\n", + " From worker 2:\t2023-06-21 09:17:46 [info ] DQN_20230621091746: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 2.669609659086398e-05, 'time_algorithm_update': 0.0011430755863344765, 'loss': 0.0840105459139478, 'time_step': 0.0011873080478451115, 'td_error': 0.33263165771910613} step=246\n", + " From worker 2:\t2023-06-21 09:17:46 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091746/model_246.pt\n", + "Epoch 1/1: 100%|████████████████████| 78/78 [00:00<00:00, 936.22it/s, loss=nan]\n", + " From worker 3:\terror in DiscreteBC\n", + " From worker 2:\t2023-06-21 09:17:47 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:47 [info ] Directory is created at d3rlpy_logs/DQN_20230621091747\n", + " From worker 2:\t2023-06-21 09:17:47 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:47 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:47 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:47 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091747/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066 , 3.8068576 , 3.1260934 , 4.35434 , 2.6527042 ,\n", + " From worker 2:\t 1.3774092 , 0.32658172]], dtype=float32), 'minimum': array([[-2.1025314 , -2.8379962 , -2.7936583 , -2.3866355 , -3.2439196 ,\n", + " From worker 2:\t -2.464134 , -0.32029364]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 3:\t2023-06-21 09:17:47 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|███████████████| 246/246 [00:00<00:00, 895.66it/s, loss=0.0896]\n", + " From worker 2:\t2023-06-21 09:17:47 [info ] DQN_20230621091747: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 2.6294855567497934e-05, 'time_algorithm_update': 0.0010709723805993553, 'loss': 0.0887927300319439, 'time_step': 0.0011126607414183578, 'td_error': 0.3539537419557423} step=246\n", + " From worker 2:\t2023-06-21 09:17:47 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091747/model_246.pt\n", + " From worker 2:\t2023-06-21 09:17:47 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:48 [info ] Directory is created at d3rlpy_logs/DQN_20230621091748\n", + " From worker 3:\t2023-06-21 09:17:48 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:48 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:48 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:48 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091748/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32), 'minimum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|██████████████████| 218/218 [00:00<00:00, 848.79it/s, loss=nan]\n", + " From worker 3:\t2023-06-21 09:17:48 [info ] DQN_20230621091748: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.6303693788860918e-05, 'time_algorithm_update': 0.0011304857534006102, 'loss': nan, 'time_step': 0.0011736056126585793, 'td_error': nan} step=218\n", + " From worker 3:\t2023-06-21 09:17:48 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091748/model_218.pt\n", + " From worker 3:\t2023-06-21 09:17:48 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:49 [info ] Directory is created at d3rlpy_logs/DQN_20230621091749\n", + " From worker 2:\t2023-06-21 09:17:49 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:49 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:49 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:49 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091749/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066 , 3.8068576 , 3.1260934 , 4.35434 , 2.5534215 ,\n", + " From worker 2:\t 1.2042079 , 0.32398468]], dtype=float32), 'minimum': array([[-2.1025314 , -2.9232402 , -2.7936583 , -2.3866355 , -3.2439196 ,\n", + " From worker 2:\t -1.6064517 , -0.23013951]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|████████████████| 218/218 [00:00<00:00, 840.38it/s, loss=0.118]\n", + " From worker 2:\t2023-06-21 09:17:49 [info ] DQN_20230621091749: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.6543206031169366e-05, 'time_algorithm_update': 0.0011419735917257607, 'loss': 0.1153688499310968, 'time_step': 0.001184966586051731, 'td_error': 0.34403533425650623} step=218\n", + " From worker 2:\t2023-06-21 09:17:49 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091749/model_218.pt\n", + " From worker 2:\t2023-06-21 09:17:49 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:50 [info ] Directory is created at d3rlpy_logs/DQN_20230621091750\n", + " From worker 3:\t2023-06-21 09:17:50 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:50 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:50 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:50 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091750/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32), 'minimum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|██████████████████| 190/190 [00:00<00:00, 840.56it/s, loss=nan]\n", + " From worker 3:\t2023-06-21 09:17:50 [info ] DQN_20230621091750: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 2.566764229222348e-05, 'time_algorithm_update': 0.001141886962087531, 'loss': nan, 'time_step': 0.0011843317433407432, 'td_error': nan} step=190\n", + " From worker 3:\t2023-06-21 09:17:50 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091750/model_190.pt\n", + " From worker 3:\t2023-06-21 09:17:50 [debug ] RoundIterator is selected.\n", + " From worker 5:\t2023-06-21 09:17:51 [info ] Directory is created at d3rlpy_logs/DQN_20230621091751\n", + " From worker 5:\t2023-06-21 09:17:51 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:51 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:51 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:51 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091751/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.315809 , 0.7300549 , 0.8098967 , 1.2654152 , 0.7491175 ,\n", + " From worker 5:\t 0.5436357 , 0.20047979]], dtype=float32), 'minimum': array([[-0.5729552 , -0.7144378 , -1.327884 , -0.81232125, -0.81751764,\n", + " From worker 5:\t -0.5665671 , -0.28309873]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 852.55it/s, loss=0.0784]\n", + " From worker 5:\t2023-06-21 09:17:52 [info ] DQN_20230621091751: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.64316523840668e-05, 'time_algorithm_update': 0.0011246554348446906, 'loss': 0.0772749390623985, 'time_step': 0.0011681132360335884, 'td_error': 0.3391844592173055} step=218\n", + " From worker 5:\t2023-06-21 09:17:52 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091751/model_218.pt\n", + " From worker 5:\t2023-06-21 09:17:52 [debug ] RoundIterator is selected.\n", + " From worker 5:\t2023-06-21 09:17:52 [info ] Directory is created at d3rlpy_logs/DQN_20230621091752\n", + " From worker 5:\t2023-06-21 09:17:52 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:52 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:52 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:52 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091752/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.315809 , 0.7300549 , 0.8098967 , 1.2654152 , 0.7321317 ,\n", + " From worker 5:\t 0.5388433 , 0.20047979]], dtype=float32), 'minimum': array([[-0.5729552 , -0.7144378 , -1.327884 , -0.81232125, -0.81751764,\n", + " From worker 5:\t -0.5665671 , -0.28309873]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 914.30it/s, loss=0.0887]\n", + " From worker 5:\t2023-06-21 09:17:52 [info ] DQN_20230621091752: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.3355177783091135e-05, 'time_algorithm_update': 0.0010528805059030516, 'loss': 0.08727689919600246, 'time_step': 0.0010901317683928605, 'td_error': 0.33895740038788924} step=218\n", + " From worker 5:\t2023-06-21 09:17:52 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091752/model_218.pt\n", + " From worker 5:\t2023-06-21 09:17:52 [debug ] RoundIterator is selected.\n", + " From worker 5:\t2023-06-21 09:17:53 [info ] Directory is created at d3rlpy_logs/DQN_20230621091753\n", + " From worker 5:\t2023-06-21 09:17:53 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 5:\t2023-06-21 09:17:53 [debug ] Building models...\n", + " From worker 5:\t2023-06-21 09:17:53 [debug ] Models have been built.\n", + " From worker 5:\t2023-06-21 09:17:53 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091753/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.315809 , 0.7300549 , 0.8098967 , 1.2654152 , 0.7491175 ,\n", + " From worker 5:\t 0.5436357 , 0.20047979]], dtype=float32), 'minimum': array([[-0.5729552 , -0.7144378 , -1.327884 , -0.81232125, -0.81751764,\n", + " From worker 5:\t -0.5665671 , -0.28309873]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 809.84it/s, loss=0.0817]\n", + " From worker 5:\t2023-06-21 09:17:53 [info ] DQN_20230621091753: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.8666006315738784e-05, 'time_algorithm_update': 0.001183026427522712, 'loss': 0.08032481709854045, 'time_step': 0.001229493989856965, 'td_error': 0.3359769608276147} step=218\n", + " From worker 5:\t2023-06-21 09:17:53 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091753/model_218.pt\n", + " From worker 7:\t2023-06-21 09:17:53 [debug ] RoundIterator is selected.\n", + " From worker 7:\t2023-06-21 09:17:53 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091753\n", + " From worker 7:\t2023-06-21 09:17:53 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 7:\t2023-06-21 09:17:53 [debug ] Building models...\n", + " From worker 7:\t2023-06-21 09:17:53 [debug ] Models have been built.\n", + " From worker 7:\t2023-06-21 09:17:53 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091753/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[249.11354 , 268.5594 , 98.88546 , 30.186954, 10.7589 ,\n", + " From worker 7:\t 100.21267 , 29.41048 ]], dtype=float32), 'minimum': array([[-250.33273 , -273.6624 , -50.02649 , -29.866972, -12.57127 ,\n", + " From worker 7:\t -98.42987 , -29.84362 ]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█| 123/123 [00:00<00:00, 242.14it/s, temp_loss=-.601, temp=1, c\n", + " From worker 7:\t2023-06-21 09:17:54 [info ] DiscreteSAC_20230621091753: epoch=1 step=123 epoch=1 metrics={'time_sample_batch': 5.082773968456237e-05, 'time_algorithm_update': 0.00404701581815394, 'temp_loss': -0.6160907208828664, 'temp': 1.004140782162426, 'critic_loss': 4.586637864267923, 'actor_loss': -4.934140949714474, 'time_step': 0.004121117475556164, 'td_error': 2.400220012705727} step=123\n", + " From worker 7:\t2023-06-21 09:17:54 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091753/model_123.pt\n", + " From worker 7:\t2023-06-21 09:17:54 [debug ] RoundIterator is selected.\n", + " From worker 7:\t2023-06-21 09:17:54 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091754\n", + " From worker 7:\t2023-06-21 09:17:54 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 7:\t2023-06-21 09:17:54 [debug ] Building models...\n", + " From worker 7:\t2023-06-21 09:17:54 [debug ] Models have been built.\n", + " From worker 7:\t2023-06-21 09:17:54 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091754/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[249.43936 , 270.23843 , 50.170696, 30.186954, 10.7589 ,\n", + " From worker 7:\t 100.21267 , 29.744823]], dtype=float32), 'minimum': array([[-250.33273 , -274.04565 , -49.614845, -29.988697, -9.468852,\n", + " From worker 7:\t -99.13402 , -30.205732]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t2023-06-21 09:17:54 [info ] Directory is created at d3rlpy_logs/DQN_20230621091754\n", + " From worker 2:\t2023-06-21 09:17:54 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:54 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:54 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:54 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091754/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066 , 3.8068576 , 3.1260934 , 4.35434 , 2.5534215 ,\n", + " From worker 2:\t 1.3774092 , 0.32658172]], dtype=float32), 'minimum': array([[-2.1025314 , -2.9232402 , -2.7936583 , -2.3866355 , -3.2439196 ,\n", + " From worker 2:\t -1.4053072 , -0.32029364]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1: 100%|█| 95/95 [00:00<00:00, 231.40it/s, temp_loss=-.711, temp=1, cri\n", + " From worker 7:\t2023-06-21 09:17:55 [info ] DiscreteSAC_20230621091754: epoch=1 step=95 epoch=1 metrics={'time_sample_batch': 5.0424274645353616e-05, 'time_algorithm_update': 0.0042391827231959296, 'temp_loss': -0.7389284298853263, 'temp': 1.0043758367237292, 'critic_loss': 4.360287598559731, 'actor_loss': -4.9657077889693415, 'time_step': 0.004313064876355623, 'td_error': 0.6025613924128718} step=95\n", + " From worker 7:\t2023-06-21 09:17:55 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091754/model_95.pt\n", + " From worker 7:\t2023-06-21 09:17:55 [debug ] RoundIterator is selected.\n", + " From worker 7:\t2023-06-21 09:17:55 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091755\n", + " From worker 7:\t2023-06-21 09:17:55 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 7:\t2023-06-21 09:17:55 [debug ] Building models...\n", + " From worker 7:\t2023-06-21 09:17:55 [debug ] Models have been built.\n", + " From worker 7:\t2023-06-21 09:17:55 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091755/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[249.11354 , 268.5594 , 98.88546 , 30.186954, 10.7589 ,\n", + " From worker 7:\t 100.21267 , 29.41048 ]], dtype=float32), 'minimum': array([[-250.33273 , -273.6624 , -50.02649 , -29.866972, -12.57127 ,\n", + " From worker 7:\t -98.42987 , -29.84362 ]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 246/246 [00:00<00:00, 791.98it/s, loss=0.0905]\n", + " From worker 2:\t2023-06-21 09:17:55 [info ] DQN_20230621091754: epoch=1 step=246 epoch=1 metrics={'time_sample_batch': 2.9921531677246094e-05, 'time_algorithm_update': 0.0012090118919930806, 'loss': 0.08958895225077868, 'time_step': 0.0012574554458866275, 'td_error': 0.3250261947255372} step=246\n", + " From worker 2:\t2023-06-21 09:17:55 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091754/model_246.pt\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:123: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 2:\t2023-06-21 09:17:55 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:55 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091755\n", + " From worker 2:\t2023-06-21 09:17:55 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:55 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:55 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:55 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091755/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.72402688e+11, 2.24971407e+11, 1.02857015e+12, 3.55687046e+12,\n", + " From worker 2:\t 5.37091244e+11, 6.58568276e+12, 1.70976110e+12]], dtype=float32), 'minimum': array([[-2.3231450e-02, -4.6398390e+11, -3.0836313e+11, -7.4163159e+11,\n", + " From worker 2:\t -1.2675402e+12, -2.5034414e+12, -5.7268870e+12]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████████| 78/78 [00:00<00:00, 866.27it/s, loss=2.27]\n", + " From worker 2:\terror in DiscreteBC2023-06-21 09:17:55 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|█| 123/123 [00:00<00:00, 217.74it/s, temp_loss=-.676, temp=1.01\n", + " From worker 7:\t2023-06-21 09:17:55 [info ] DiscreteSAC_20230621091755: epoch=1 step=123 epoch=1 metrics={'time_sample_batch': 5.268469089415015e-05, 'time_algorithm_update': 0.004506025857072536, 'temp_loss': -0.684950694621624, 'temp': 1.0064082373448504, 'critic_loss': 4.238984786398042, 'actor_loss': -4.9216519371280825, 'time_step': 0.004583397531897072, 'td_error': 2.007497995643337} step=123\n", + " From worker 7:\t2023-06-21 09:17:55 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091755/model_123.pt\n", + " From worker 3:\t2023-06-21 09:17:55 [info ] Directory is created at d3rlpy_logs/DQN_20230621091755\n", + " From worker 3:\t2023-06-21 09:17:55 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:55 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:55 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:55 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091755/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32), 'minimum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|██████████████████| 218/218 [00:00<00:00, 843.91it/s, loss=nan]\n", + " From worker 3:\t2023-06-21 09:17:56 [info ] DQN_20230621091755: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.6251197954930297e-05, 'time_algorithm_update': 0.0011368268126741461, 'loss': nan, 'time_step': 0.001179718096321876, 'td_error': nan} step=218\n", + " From worker 3:\t2023-06-21 09:17:56 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091755/model_218.pt\n", + " From worker 3:\t2023-06-21 09:17:56 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:56 [info ] Directory is created at d3rlpy_logs/DQN_20230621091756\n", + " From worker 3:\t2023-06-21 09:17:56 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:56 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:56 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:56 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091756/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9306842 , 1.1401778 , 0.726072 , 0.33919048, 0.40193763,\n", + " From worker 3:\t 0.2281029 , 0.7055056 ]], dtype=float32), 'minimum': array([[-0.46761814, -0.36881566, -0.31744727, -0.32251614, -0.45945558,\n", + " From worker 3:\t -0.46688473, -0.53166074]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 877.64it/s, loss=0.0968]\n", + " From worker 3:\t2023-06-21 09:17:56 [info ] DQN_20230621091756: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.6084961147483336e-05, 'time_algorithm_update': 0.001093139342211802, 'loss': 0.0951381272918314, 'time_step': 0.0011351053867865047, 'td_error': 0.3471383628173835} step=218\n", + " From worker 3:\t2023-06-21 09:17:56 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091756/model_218.pt\n", + " From worker 3:\t2023-06-21 09:17:56 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:56 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091756\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:56 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091756/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066, 2.9570794, 2.6705673, 3.0438037, 3.5316846, 1.699613 ,\n", + " From worker 2:\t 1.0649518]], dtype=float32), 'minimum': array([[-1.6466842, -4.368038 , -3.1260934, -3.867478 , -2.722415 ,\n", + " From worker 2:\t -2.443797 , -2.5203583]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████████| 78/78 [00:00<00:00, 939.07it/s, loss=2.21]\n", + " From worker 2:\t\n", + " From worker 2:\terror in DiscreteBC\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:56 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091756\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:56 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:56 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091756/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066, 4.312921 , 2.968088 , 4.3629656, 3.9302883, 2.4414213,\n", + " From worker 2:\t 1.0649518]], dtype=float32), 'minimum': array([[-2.1025314, -2.7067137, -2.7774625, -2.4111278, -3.2439196,\n", + " From worker 2:\t -1.6182172, -2.5203583]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 256.12it/s, temp_loss=-.589, temp=0.99\n", + " From worker 2:\t2023-06-21 09:17:57 [info ] DiscreteSAC_20230621091756: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 4.952325733429795e-05, 'time_algorithm_update': 0.0038246167909114733, 'temp_loss': -0.5941504532729762, 'temp': 0.995188945477162, 'critic_loss': 4.329673581167099, 'actor_loss': -4.933959532221523, 'time_step': 0.003896135802662701, 'td_error': 0.46266440524758845} step=109\n", + " From worker 2:\t2023-06-21 09:17:57 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091756/model_109.pt\n", + " From worker 2:\t2023-06-21 09:17:57 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:57 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091757\n", + " From worker 2:\t2023-06-21 09:17:57 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:57 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:57 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:57 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091757/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066, 4.312921 , 2.968088 , 4.3629656, 3.9302883, 2.4414213,\n", + " From worker 2:\t 1.0649518]], dtype=float32), 'minimum': array([[-2.1025314, -2.7067137, -2.7729053, -2.4111278, -3.2439196,\n", + " From worker 2:\t -1.6182172, -2.5203583]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 3:\t2023-06-21 09:17:57 [info ] Directory is created at d3rlpy_logs/DQN_20230621091757\n", + " From worker 3:\t2023-06-21 09:17:57 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:57 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:57 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:57 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091757/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9306842 , 1.0696301 , 0.726072 , 0.32656038, 0.40193763,\n", + " From worker 3:\t 0.2281029 , 0.7055056 ]], dtype=float32), 'minimum': array([[-0.46761814, -0.36881566, -0.31744727, -0.32251614, -0.45945558,\n", + " From worker 3:\t -0.46688473, -0.53166074]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 698.20it/s, loss=0.0834]\n", + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 213.08it/s, temp_loss=-.588, temp=0.99\n", + " From worker 3:\t2023-06-21 09:17:57 [info ] DQN_20230621091757: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 3.408729483228211e-05, 'time_algorithm_update': 0.0013705765435455043, 'loss': 0.082053680357378, 'time_step': 0.0014254740618784493, 'td_error': 0.3401573140539434} step=218\n", + " From worker 3:\t2023-06-21 09:17:57 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091757/model_218.pt\n", + " From worker 3:\t2023-06-21 09:17:57 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:17:57 [info ] DiscreteSAC_20230621091757: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 5.6034928068108514e-05, 'time_algorithm_update': 0.004601797926316567, 'temp_loss': -0.5967188704034758, 'temp': 0.9934305532262959, 'critic_loss': 4.554059888244769, 'actor_loss': -4.885921320783982, 'time_step': 0.004683912347216125, 'td_error': 0.41095896724406156} step=109\n", + " From worker 2:\t2023-06-21 09:17:57 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091757/model_109.pt\n", + " From worker 2:\t2023-06-21 09:17:57 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:58 [info ] Directory is created at d3rlpy_logs/DQN_20230621091758\n", + " From worker 3:\t2023-06-21 09:17:58 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:17:58 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091758\n", + " From worker 2:\t2023-06-21 09:17:58 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:58 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:58 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:58 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091758/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9306842 , 1.1401778 , 0.726072 , 0.33919048, 0.39953673,\n", + " From worker 3:\t 0.22750928, 0.8964623 ]], dtype=float32), 'minimum': array([[-0.46761814, -0.37498793, -0.32135844, -0.2982687 , -0.50954545,\n", + " From worker 3:\t -0.45328155, -0.5060656 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t2023-06-21 09:17:58 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:17:58 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:17:58 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091758/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[3.2017066, 4.312921 , 3.0104208, 4.3629656, 3.9302883, 2.4414213,\n", + " From worker 2:\t 1.0649518]], dtype=float32), 'minimum': array([[-2.1025314, -2.7067137, -2.941255 , -2.3866355, -3.2439196,\n", + " From worker 2:\t -1.6358759, -2.5203583]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 712.20it/s, loss=0.0949]\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] DQN_20230621091758: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 3.299909994142865e-05, 'time_algorithm_update': 0.00134372929914282, 'loss': 0.09326398391447482, 'time_step': 0.0013967500914127454, 'td_error': 0.3534912199521699} step=218\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091758/model_218.pt\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091759\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091759/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1.8103775, 0. , 0. , 0. , 1.6644697, 0. ,\n", + " From worker 3:\t 0.2135467]], dtype=float32), 'minimum': array([[-0.52864933, 0. , 0. , 0. , -0.9911892 ,\n", + " From worker 3:\t 0. , -0.21503155]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|████████████████████| 70/70 [00:00<00:00, 841.43it/s, loss=nan]\n", + " From worker 3:\terror in DiscreteBC2023-06-21 09:17:59 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091759\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] Building models...\n", + "Epoch 1/1: 100%|█| 123/123 [00:00<00:00, 222.98it/s, temp_loss=-.578, temp=0.99\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091759/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9306842 , 1.1384733 , 0.7587837 , 0.3478039 , 0.40335074,\n", + " From worker 3:\t 0.2281029 , 0.7128955 ]], dtype=float32), 'minimum': array([[-0.46346295, -0.37549204, -0.45391178, -0.4061369 , -0.50954545,\n", + " From worker 3:\t -0.47077656, -0.5478712 ]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t2023-06-21 09:17:59 [info ] DiscreteSAC_20230621091758: epoch=1 step=123 epoch=1 metrics={'time_sample_batch': 5.458428607723577e-05, 'time_algorithm_update': 0.0043946533668331985, 'temp_loss': -0.5776027046537738, 'temp': 0.9948140082320547, 'critic_loss': 4.265293878753011, 'actor_loss': -4.916958991104995, 'time_step': 0.004474630200765967, 'td_error': 0.3660259082690154} step=123\n", + " From worker 2:\t2023-06-21 09:17:59 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091758/model_123.pt\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n", + " From worker 2:\t warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:123: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 2:\t2023-06-21 09:17:59 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|█| 95/95 [00:00<00:00, 226.11it/s, temp_loss=-.558, temp=0.991,\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] DiscreteSAC_20230621091759: epoch=1 step=95 epoch=1 metrics={'time_sample_batch': 5.3493600142629526e-05, 'time_algorithm_update': 0.004334371968319542, 'temp_loss': -0.6238991316726529, 'temp': 0.9914160069666411, 'critic_loss': 4.550972572753304, 'actor_loss': -4.8012980159960295, 'time_step': 0.004412668629696494, 'td_error': 0.3674936336468447} step=95\n", + " From worker 3:\t2023-06-21 09:17:59 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091759/model_95.pt\n", + " From worker 3:\t2023-06-21 09:17:59 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:17:59 [info ] Directory is created at d3rlpy_logs/DQN_20230621091759\n", + " From worker 6:\t2023-06-21 09:17:59 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:17:59 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:17:59 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:17:59 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091759/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[249.77428 , 271.3161 , 50.14434 , 30.42416 , 10.7589 ,\n", + " From worker 6:\t 100.1691 , 30.100527]], dtype=float32), 'minimum': array([[-249.33698 , -268.4478 , -49.885757, -29.988697, -11.494537,\n", + " From worker 6:\t -99.35471 , -29.723509]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|████████████████| 190/190 [00:00<00:00, 883.40it/s, loss=0.124]\n", + " From worker 6:\t2023-06-21 09:18:00 [info ] DQN_20230621091759: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 2.6126911765650698e-05, 'time_algorithm_update': 0.0010848685314780787, 'loss': 0.12158315730722327, 'time_step': 0.0011276119633724815, 'td_error': 0.4381055625556321} step=190\n", + " From worker 6:\t2023-06-21 09:18:00 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091759/model_190.pt\n", + " From worker 6:\t2023-06-21 09:18:00 [debug ] RoundIterator is selected.\n", + " From worker 6:\t2023-06-21 09:18:00 [info ] Directory is created at d3rlpy_logs/DQN_20230621091800\n", + " From worker 6:\t2023-06-21 09:18:00 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:18:00 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:18:00 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:18:00 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091800/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[250.24196 , 271.3161 , 215.67331 , 30.42416 , 10.7589 ,\n", + " From worker 6:\t 163.16124 , 30.100527]], dtype=float32), 'minimum': array([[-249.33698 , -268.4478 , -49.885757, -29.988697, -11.494537,\n", + " From worker 6:\t -99.36066 , -163.37228 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|████████████████| 218/218 [00:00<00:00, 859.26it/s, loss=0.167]\n", + " From worker 6:\t2023-06-21 09:18:00 [info ] DQN_20230621091800: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.437228456549688e-05, 'time_algorithm_update': 0.001121238830986373, 'loss': 0.1643176573337218, 'time_step': 0.0011600376269139282, 'td_error': 0.43984013669077254} step=218\n", + " From worker 6:\t2023-06-21 09:18:00 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091800/model_218.pt\n", + " From worker 6:\t2023-06-21 09:18:00 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:18:00 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091800\n", + " From worker 2:\t2023-06-21 09:18:00 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:18:00 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:18:00 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:18:00 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091800/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[4.0430723e+12, 1.8960426e+12, 3.1853068e+12, 6.7555387e+11,\n", + " From worker 2:\t 2.3783101e+12, 5.4438857e+12, 3.8419759e+12]], dtype=float32), 'minimum': array([[-6.3648321e+11, -1.1384359e+12, -1.8285029e+12, -4.3468859e+11,\n", + " From worker 2:\t -3.0583730e+12, -5.3568445e+12, -4.1177155e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 248.32it/s, temp_loss=-.586, temp=0.99\n", + " From worker 2:\t2023-06-21 09:18:01 [info ] DiscreteSAC_20230621091800: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 5.08115925920119e-05, 'time_algorithm_update': 0.003944429782552457, 'temp_loss': -0.6070170115525706, 'temp': 0.9968975908165678, 'critic_loss': 4.642771337010445, 'actor_loss': -4.992712204609442, 'time_step': 0.004018339542073941, 'td_error': 0.3385400824074033} step=109\n", + " From worker 2:\t2023-06-21 09:18:01 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091800/model_109.pt\n", + " From worker 2:\t2023-06-21 09:18:01 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:18:01 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091801\n", + " From worker 2:\t2023-06-21 09:18:01 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:18:01 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:18:01 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:18:01 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091801/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[4.0430723e+12, 1.8960426e+12, 3.1853068e+12, 6.7555387e+11,\n", + " From worker 2:\t 2.3783101e+12, 5.4438857e+12, 3.8419759e+12]], dtype=float32), 'minimum': array([[-6.3648321e+11, -1.1384359e+12, -1.1599949e+12, -2.8665627e+11,\n", + " From worker 2:\t -3.0583730e+12, -5.3568445e+12, -4.1177155e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 255.19it/s, temp_loss=-.523, temp=0.99\n", + " From worker 6:\t2023-06-21 09:18:01 [info ] Directory is created at d3rlpy_logs/DQN_20230621091801\n", + " From worker 6:\t2023-06-21 09:18:01 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 6:\t2023-06-21 09:18:01 [debug ] Building models...\n", + " From worker 6:\t2023-06-21 09:18:01 [debug ] Models have been built.\n", + " From worker 6:\t2023-06-21 09:18:01 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091801/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[250.24196 , 271.3161 , 215.67331 , 30.42416 , 10.7589 ,\n", + " From worker 6:\t 163.16124 , 30.100527]], dtype=float32), 'minimum': array([[-249.33698 , -268.4478 , -49.885757, -29.988697, -11.494537,\n", + " From worker 6:\t -99.36066 , -163.37228 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 2:\t2023-06-21 09:18:01 [info ] DiscreteSAC_20230621091801: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 4.7020955916938434e-05, 'time_algorithm_update': 0.0038419404161085776, 'temp_loss': -0.5592389937250986, 'temp': 0.9972517807549293, 'critic_loss': 4.588769269645761, 'actor_loss': -4.982084239294769, 'time_step': 0.00391071652053693, 'td_error': 0.36861823891947937} step=109\n", + " From worker 2:\t2023-06-21 09:18:01 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091801/model_109.pt\n", + " From worker 2:\t2023-06-21 09:18:01 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|████████████████| 218/218 [00:00<00:00, 897.00it/s, loss=0.121]\n", + " From worker 6:\t2023-06-21 09:18:01 [info ] DQN_20230621091801: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.3999345411948106e-05, 'time_algorithm_update': 0.0010723859891978973, 'loss': 0.1192954094106451, 'time_step': 0.00111099230040104, 'td_error': 0.41862508386107267} step=218\n", + " From worker 6:\t2023-06-21 09:18:01 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091801/model_218.pt\n", + " From worker 6:\t2023-06-21 09:18:01 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:18:02 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091802\n", + " From worker 2:\t2023-06-21 09:18:02 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:18:02 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:18:02 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:18:02 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091802/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[4.0430723e+12, 1.8960426e+12, 3.1853068e+12, 6.7555387e+11,\n", + " From worker 2:\t 2.3783101e+12, 5.4438857e+12, 3.8419759e+12]], dtype=float32), 'minimum': array([[-6.3648321e+11, -1.1384359e+12, -1.1599949e+12, -2.8665627e+11,\n", + " From worker 2:\t -3.0583730e+12, -5.3568445e+12, -4.1177155e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 4:\t2023-06-21 09:18:02 [info ] Directory is created at d3rlpy_logs/DQN_20230621091802\n", + " From worker 4:\t2023-06-21 09:18:02 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:18:02 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:18:02 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:18:02 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091802/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[1. , 0.9285714, 1. , 1. , 1.0121458, 1. ,\n", + " From worker 4:\t 1. ]], dtype=float32), 'minimum': array([[ 0. , -0.71428573, -0.96666664, 0. , -1.0080972 ,\n", + " From worker 4:\t -0.99 , -0.98 ]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 241.09it/s, temp_loss=-.566, temp=0.99\n", + " From worker 2:\t2023-06-21 09:18:03 [info ] DiscreteSAC_20230621091802: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 4.8385847599134534e-05, 'time_algorithm_update': 0.004068871156884989, 'temp_loss': -0.5941836126682716, 'temp': 0.9984096682399785, 'critic_loss': 4.425358019290714, 'actor_loss': -4.960146169050025, 'time_step': 0.004139005591016297, 'td_error': 0.34059097209100414} step=109\n", + " From worker 2:\t2023-06-21 09:18:03 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091802/model_109.pt\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n", + " From worker 2:\t warnings.warn(\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 769.17it/s, loss=0.0892]\n", + " From worker 4:\t2023-06-21 09:18:03 [info ] DQN_20230621091802: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.7939813946365218e-05, 'time_algorithm_update': 0.0012496075498948404, 'loss': 0.08782375861095999, 'time_step': 0.0012951640907777559, 'td_error': 0.3477889492142646} step=218\n", + " From worker 4:\t2023-06-21 09:18:03 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091802/model_218.pt\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:123: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 2:\t2023-06-21 09:18:03 [debug ] RoundIterator is selected.\n", + " From worker 2:\t2023-06-21 09:18:03 [info ] Directory is created at d3rlpy_logs/DiscreteBC_20230621091803\n", + " From worker 2:\t2023-06-21 09:18:03 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:18:03 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:18:03 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:18:03 [info ] Parameters are saved to d3rlpy_logs/DiscreteBC_20230621091803/params.json params={'action_scaler': None, 'batch_size': 100, 'beta': 0.5, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 1.0, 'generated_maxlen': 100000, 'learning_rate': 0.001, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[2.1615496e+12, 2.6355292e+12, 2.1067614e+12, 1.6548812e+11,\n", + " From worker 2:\t 4.0524309e+12, 2.4732295e+12, 1.3723012e+12]], dtype=float32), 'minimum': array([[-4.5286566e+12, -1.7273379e+12, -4.7074060e+11, -5.8335625e+11,\n", + " From worker 2:\t -3.0358634e+12, -2.8378270e+12, -1.3656961e+12]], dtype=float32)}}, 'use_gpu': None, 'algorithm': 'DiscreteBC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 4:\t2023-06-21 09:18:03 [debug ] RoundIterator is selected.\n", + " From worker 4:\t2023-06-21 09:18:03 [info ] Directory is created at d3rlpy_logs/DQN_20230621091803\n", + " From worker 4:\t2023-06-21 09:18:03 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:18:03 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:18:03 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:18:03 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091803/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9207175, 2.7306168, 2.8907115, 2.5570333, 2.8882327, 1.2662084,\n", + " From worker 4:\t 0.5209782]], dtype=float32), 'minimum': array([[-1.2842066 , -2.7194939 , -2.822502 , -2.5797536 , -2.9841356 ,\n", + " From worker 4:\t -1.2347333 , -0.38340685]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|███████████████████| 69/69 [00:00<00:00, 888.80it/s, loss=2.32]\n", + " From worker 2:\terror in DiscreteBC\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n", + " From worker 2:\t warnings.warn(\n", + "Epoch 1/1: 100%|███████████████| 190/190 [00:00<00:00, 826.76it/s, loss=0.0938]\n", + " From worker 4:\t2023-06-21 09:18:03 [info ] DQN_20230621091803: epoch=1 step=190 epoch=1 metrics={'time_sample_batch': 2.738927540026213e-05, 'time_algorithm_update': 0.0011602765635440224, 'loss': 0.09130570031702519, 'time_step': 0.0012044906616210938, 'td_error': 0.33305522559426054} step=190\n", + " From worker 4:\t2023-06-21 09:18:03 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091803/model_190.pt\n", + " From worker 4:\t2023-06-21 09:18:03 [debug ] RoundIterator is selected.\n", + " From worker 2:\t/Users/ppalmes/phome/julia/AutoMLPipeline.jl/AutoOfflineRL/examples/.CondaPkg/env/lib/python3.11/site-packages/sklearn/decomposition/_fastica.py:123: ConvergenceWarning: FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.\n", + " From worker 2:\t warnings.warn(\n", + " From worker 2:\t2023-06-21 09:18:03 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:18:03 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091803\n", + " From worker 3:\t2023-06-21 09:18:03 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:18:03 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:18:03 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:18:03 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091803/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9306842 , 1.1384733 , 0.7587837 , 0.3478039 , 0.40335074,\n", + " From worker 3:\t 0.2281029 , 0.7128955 ]], dtype=float32), 'minimum': array([[-0.46346295, -0.37549204, -0.45391178, -0.4061369 , -0.50954545,\n", + " From worker 3:\t -0.47077656, -0.5478712 ]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█| 95/95 [00:00<00:00, 233.10it/s, temp_loss=-.606, temp=0.992,\n", + " From worker 3:\t2023-06-21 09:18:04 [info ] DiscreteSAC_20230621091803: epoch=1 step=95 epoch=1 metrics={'time_sample_batch': 4.924222042686061e-05, 'time_algorithm_update': 0.004208978853727642, 'temp_loss': -0.65238654556145, 'temp': 0.992238650196477, 'critic_loss': 4.486884029915458, 'actor_loss': -4.853306855653462, 'time_step': 0.004281633778622276, 'td_error': 0.41911956456499566} step=95\n", + " From worker 3:\t2023-06-21 09:18:04 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091803/model_95.pt\n", + " From worker 3:\t2023-06-21 09:18:04 [debug ] RoundIterator is selected.\n", + " From worker 3:\t2023-06-21 09:18:04 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091804\n", + " From worker 3:\t2023-06-21 09:18:04 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:18:04 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:18:04 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:18:04 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091804/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9306842 , 1.1384733 , 0.7587837 , 0.3510912 , 0.40218955,\n", + " From worker 3:\t 0.29190326, 0.5466196 ]], dtype=float32), 'minimum': array([[-0.46346295, -0.37549204, -0.45391178, -0.3651803 , -0.50954545,\n", + " From worker 3:\t -0.45396373, -0.40756777]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 4:\t2023-06-21 09:18:04 [info ] Directory is created at d3rlpy_logs/DQN_20230621091804\n", + " From worker 4:\t2023-06-21 09:18:04 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 4:\t2023-06-21 09:18:04 [debug ] Building models...\n", + " From worker 4:\t2023-06-21 09:18:04 [debug ] Models have been built.\n", + " From worker 4:\t2023-06-21 09:18:04 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091804/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0.9207175, 2.7306168, 2.8907115, 2.5570333, 2.8882327, 1.3416436,\n", + " From worker 4:\t 0.6212866]], dtype=float32), 'minimum': array([[-1.2842066 , -2.7194939 , -2.822502 , -2.5797536 , -2.9841356 ,\n", + " From worker 4:\t -1.2347333 , -0.38340685]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 100%|█| 109/109 [00:00<00:00, 239.23it/s, temp_loss=-.663, temp=0.99\n", + " From worker 3:\t2023-06-21 09:18:04 [info ] DiscreteSAC_20230621091804: epoch=1 step=109 epoch=1 metrics={'time_sample_batch': 4.9464199521126006e-05, 'time_algorithm_update': 0.004099708084666401, 'temp_loss': -0.6689465117721963, 'temp': 0.9930887457427628, 'critic_loss': 4.150376217627744, 'actor_loss': -4.793336282082654, 'time_step': 0.004171787051979555, 'td_error': 0.39833078629579516} step=109\n", + " From worker 3:\t2023-06-21 09:18:04 [info ] Model parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091804/model_109.pt\n", + " From worker 3:\t2023-06-21 09:18:04 [debug ] RoundIterator is selected.\n", + "Epoch 1/1: 100%|███████████████| 218/218 [00:00<00:00, 792.58it/s, loss=0.0856]\n", + " From worker 4:\t2023-06-21 09:18:04 [info ] DQN_20230621091804: epoch=1 step=218 epoch=1 metrics={'time_sample_batch': 2.8987543298563827e-05, 'time_algorithm_update': 0.0012087023586308191, 'loss': 0.08417961127850987, 'time_step': 0.0012563106116898564, 'td_error': 0.3772716885831778} step=218\n", + " From worker 4:\t2023-06-21 09:18:04 [info ] Model parameters are saved to d3rlpy_logs/DQN_20230621091804/model_218.pt\n", + " From worker 4:\t2023-06-21 09:18:04 [debug ] RoundIterator is selected.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " From worker 2:\t2023-06-21 09:18:05 [info ] Directory is created at d3rlpy_logs/DQN_20230621091805\n", + " From worker 2:\t2023-06-21 09:18:05 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 2:\t2023-06-21 09:18:05 [debug ] Building models...\n", + " From worker 2:\t2023-06-21 09:18:05 [debug ] Models have been built.\n", + " From worker 2:\t2023-06-21 09:18:05 [info ] Parameters are saved to d3rlpy_logs/DQN_20230621091805/params.json params={'action_scaler': None, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[5.7978133e+11, 4.5132997e+12, 2.7877811e+12, 1.9566082e+12,\n", + " From worker 2:\t 1.8710200e+12, 4.4437136e+12, 2.3416296e+12]], dtype=float32), 'minimum': array([[-1.6165357e+12, -1.6694441e+12, -8.4551401e+11, -3.8393081e+12,\n", + " From worker 2:\t -1.8710200e+12, -4.4437136e+12, -1.3556292e+12]], dtype=float32)}}, 'target_update_interval': 8000, 'use_gpu': None, 'algorithm': 'DQN', 'observation_shape': (7,), 'action_size': 101}\n", + " From worker 3:\t2023-06-21 09:18:05 [info ] Directory is created at d3rlpy_logs/DiscreteSAC_20230621091805\n", + " From worker 3:\t2023-06-21 09:18:05 [debug ] Fitting scaler... scaler=min_max\n", + " From worker 3:\t2023-06-21 09:18:05 [debug ] Building models...\n", + " From worker 3:\t2023-06-21 09:18:05 [debug ] Models have been built.\n", + " From worker 3:\t2023-06-21 09:18:05 [info ] Parameters are saved to d3rlpy_logs/DiscreteSAC_20230621091805/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 64, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'min_max', 'params': {'maximum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32), 'minimum': array([[0., 0., 0., 0., 0., 0., 0.]], dtype=float32)}}, 'target_update_interval': 8000, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 0.0001, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'DiscreteSAC', 'observation_shape': (7,), 'action_size': 101}\n", + "Epoch 1/1: 0%| | 0/109 [00:00
86×2 DataFrame
61 rows omitted
Rowpipelinetd_error
StringFloat64
1RobustScaler |> PCA |> DiscreteCQL0.348413
2RobustScaler |> FastICA |> DiscreteCQL0.375755
3PowerTransformer |> PCA |> DiscreteCQL0.360337
4PowerTransformer |> FastICA |> DiscreteCQL0.351133
5Normalizer |> PCA |> DiscreteCQL0.358127
6Normalizer |> FastICA |> DiscreteCQL0.386176
7Normalizer |> Noop |> DiscreteCQL0.37901
8StandardScaler |> PCA |> DiscreteCQL0.386691
9StandardScaler |> Noop |> DiscreteCQL0.381613
10MinMaxScaler |> PCA |> DiscreteCQL0.36608
11MinMaxScaler |> FastICA |> DiscreteCQL0.364046
12Noop |> PCA |> DiscreteCQL0.436275
13Noop |> FastICA |> DiscreteCQL0.378103
75PowerTransformer |> PCA |> DQN0.341005
76PowerTransformer |> FastICA |> DQN0.330706
77PowerTransformer |> Noop |> DQN5.34366e21
78Normalizer |> PCA |> DQN0.346929
79Normalizer |> FastICA |> DQN0.332194
80Normalizer |> Noop |> DQN0.340522
81StandardScaler |> PCA |> DQN0.348759
82StandardScaler |> FastICA |> DQN0.329664
83MinMaxScaler |> PCA |> DQN0.33804
84MinMaxScaler |> FastICA |> DQN0.336319
85Noop |> PCA |> DQN0.43219
86Noop |> FactorAnalysis |> DQN2.56554e10
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& pipeline & td\\_error\\\\\n", + "\t\\hline\n", + "\t& String & Float64\\\\\n", + "\t\\hline\n", + "\t1 & RobustScaler |> PCA |> DiscreteCQL & 0.348413 \\\\\n", + "\t2 & RobustScaler |> FastICA |> DiscreteCQL & 0.375755 \\\\\n", + "\t3 & PowerTransformer |> PCA |> DiscreteCQL & 0.360337 \\\\\n", + "\t4 & PowerTransformer |> FastICA |> DiscreteCQL & 0.351133 \\\\\n", + "\t5 & Normalizer |> PCA |> DiscreteCQL & 0.358127 \\\\\n", + "\t6 & Normalizer |> FastICA |> DiscreteCQL & 0.386176 \\\\\n", + "\t7 & Normalizer |> Noop |> DiscreteCQL & 0.37901 \\\\\n", + "\t8 & StandardScaler |> PCA |> DiscreteCQL & 0.386691 \\\\\n", + "\t9 & StandardScaler |> Noop |> DiscreteCQL & 0.381613 \\\\\n", + "\t10 & MinMaxScaler |> PCA |> DiscreteCQL & 0.36608 \\\\\n", + "\t11 & MinMaxScaler |> FastICA |> DiscreteCQL & 0.364046 \\\\\n", + "\t12 & Noop |> PCA |> DiscreteCQL & 0.436275 \\\\\n", + "\t13 & Noop |> FastICA |> DiscreteCQL & 0.378103 \\\\\n", + "\t14 & Noop |> FactorAnalysis |> DiscreteCQL & 13.7154 \\\\\n", + "\t15 & Noop |> Noop |> DiscreteCQL & 0.407878 \\\\\n", + "\t16 & RobustScaler |> PCA |> NFQ & 3.56353e25 \\\\\n", + "\t17 & RobustScaler |> FastICA |> NFQ & 0.378888 \\\\\n", + "\t18 & PowerTransformer |> PCA |> NFQ & 0.348923 \\\\\n", + "\t19 & PowerTransformer |> FastICA |> NFQ & 0.575808 \\\\\n", + "\t20 & Normalizer |> PCA |> NFQ & 0.512982 \\\\\n", + "\t21 & Normalizer |> FastICA |> NFQ & 0.41633 \\\\\n", + "\t22 & Normalizer |> Noop |> NFQ & 0.430991 \\\\\n", + "\t23 & StandardScaler |> PCA |> NFQ & 0.348243 \\\\\n", + "\t24 & StandardScaler |> FastICA |> NFQ & 0.443391 \\\\\n", + "\t25 & MinMaxScaler |> PCA |> NFQ & 0.370624 \\\\\n", + "\t26 & MinMaxScaler |> FastICA |> NFQ & 0.355514 \\\\\n", + "\t27 & Noop |> PCA |> NFQ & 1.16437 \\\\\n", + "\t28 & Noop |> FastICA |> NFQ & 0.548647 \\\\\n", + "\t29 & Noop |> FactorAnalysis |> NFQ & 2.48772e14 \\\\\n", + "\t30 & RobustScaler |> PCA |> DoubleDQN & 0.334047 \\\\\n", + "\t$\\dots$ & $\\dots$ & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m86×2 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m pipeline \u001b[0m\u001b[1m td_error \u001b[0m\n", + " │\u001b[90m String \u001b[0m\u001b[90m Float64 \u001b[0m\n", + "─────┼────────────────────────────────────────────────\n", + " 1 │ RobustScaler |> PCA |> DiscreteC… 0.348413\n", + " 2 │ RobustScaler |> FastICA |> Discr… 0.375755\n", + " 3 │ PowerTransformer |> PCA |> Discr… 0.360337\n", + " 4 │ PowerTransformer |> FastICA |> D… 0.351133\n", + " 5 │ Normalizer |> PCA |> DiscreteCQL 0.358127\n", + " 6 │ Normalizer |> FastICA |> Discret… 0.386176\n", + " 7 │ Normalizer |> Noop |> DiscreteCQL 0.37901\n", + " 8 │ StandardScaler |> PCA |> Discret… 0.386691\n", + " 9 │ StandardScaler |> Noop |> Discre… 0.381613\n", + " 10 │ MinMaxScaler |> PCA |> DiscreteC… 0.36608\n", + " 11 │ MinMaxScaler |> FastICA |> Discr… 0.364046\n", + " ⋮ │ ⋮ ⋮\n", + " 77 │ PowerTransformer |> Noop |> DQN 5.34366e21\n", + " 78 │ Normalizer |> PCA |> DQN 0.346929\n", + " 79 │ Normalizer |> FastICA |> DQN 0.332194\n", + " 80 │ Normalizer |> Noop |> DQN 0.340522\n", + " 81 │ StandardScaler |> PCA |> DQN 0.348759\n", + " 82 │ StandardScaler |> FastICA |> DQN 0.329664\n", + " 83 │ MinMaxScaler |> PCA |> DQN 0.33804\n", + " 84 │ MinMaxScaler |> FastICA |> DQN 0.336319\n", + " 85 │ Noop |> PCA |> DQN 0.43219\n", + " 86 │ Noop |> FactorAnalysis |> DQN 2.56554e10\n", + "\u001b[36m 65 rows omitted\u001b[0m" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function pipelinesearch()\n", + " agentnames = [\"DiscreteCQL\",\"NFQ\",\"DoubleDQN\",\"DiscreteSAC\",\"DiscreteBCQ\",\"DiscreteBC\",\"DQN\"]\n", + " scalers = [rb,pt,norm,std,mx,noop]\n", + " extractors = [pca,ica,fa,noop]\n", + " dfresults = @sync @distributed (vcat) for agentname in agentnames\n", + " @distributed (vcat) for sc in scalers\n", + " @distributed (vcat) for xt in extractors\n", + " try\n", + " rlagent = DiscreteRLOffline(agentname,Dict(:runtime_args=>Dict(:n_epochs=>1)))\n", + " rlpipeline = ((numf |> sc |> xt)) |> rlagent \n", + " res = crossvalidateRL(rlpipeline,observation,action_reward_terminal)\n", + " scn = sc.name[1:end - 4]; xtn = xt.name[1:end - 4]; lrn = rlagent.name[1:end - 4]\n", + " pname = \"$scn |> $xtn |> $lrn\"\n", + " if !isnan(res)\n", + " DataFrame(pipeline=pname,td_error=res)\n", + " else\n", + " DataFrame()\n", + " end\n", + " catch e\n", + " println(\"error in $agentname\")\n", + " DataFrame()\n", + " end\n", + " end\n", + " end\n", + " end\n", + " #sort!(dfresults,:percent_action_matches,rev=true)\n", + " return dfresults\n", + "end\n", + "dftable= pipelinesearch()" + ] + }, + { + "cell_type": "markdown", + "id": "1d3e98a8", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Results" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "ed8b1175", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
86×2 DataFrame
61 rows omitted
Rowpipelinetd_error
StringFloat64
1Noop |> FastICA |> DoubleDQN0.325789
2StandardScaler |> FastICA |> DQN0.329664
3PowerTransformer |> FastICA |> DQN0.330706
4PowerTransformer |> FastICA |> DiscreteSAC0.331318
5Normalizer |> FastICA |> DQN0.332194
6PowerTransformer |> FastICA |> DiscreteBCQ0.333078
7RobustScaler |> FastICA |> DQN0.33313
8StandardScaler |> Noop |> DoubleDQN0.33318
9StandardScaler |> PCA |> DoubleDQN0.333818
10RobustScaler |> PCA |> DoubleDQN0.334047
11Normalizer |> Noop |> DiscreteBCQ0.334562
12Normalizer |> PCA |> DiscreteBCQ0.335437
13RobustScaler |> FastICA |> DoubleDQN0.336118
75Noop |> PCA |> NFQ1.16437
76Noop |> PCA |> DiscreteSAC1.67009
77Noop |> FactorAnalysis |> DiscreteCQL13.7154
78Noop |> FactorAnalysis |> DiscreteSAC2.26795e6
79Noop |> FactorAnalysis |> DQN2.56554e10
80Noop |> FactorAnalysis |> DoubleDQN2.69701e11
81Noop |> FactorAnalysis |> DiscreteBCQ2.61634e13
82Noop |> FactorAnalysis |> NFQ2.48772e14
83PowerTransformer |> Noop |> DQN5.34366e21
84PowerTransformer |> Noop |> DoubleDQN3.26964e22
85RobustScaler |> PCA |> NFQ3.56353e25
86RobustScaler |> PCA |> DiscreteSAC3.36113e26
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& pipeline & td\\_error\\\\\n", + "\t\\hline\n", + "\t& String & Float64\\\\\n", + "\t\\hline\n", + "\t1 & Noop |> FastICA |> DoubleDQN & 0.325789 \\\\\n", + "\t2 & StandardScaler |> FastICA |> DQN & 0.329664 \\\\\n", + "\t3 & PowerTransformer |> FastICA |> DQN & 0.330706 \\\\\n", + "\t4 & PowerTransformer |> FastICA |> DiscreteSAC & 0.331318 \\\\\n", + "\t5 & Normalizer |> FastICA |> DQN & 0.332194 \\\\\n", + "\t6 & PowerTransformer |> FastICA |> DiscreteBCQ & 0.333078 \\\\\n", + "\t7 & RobustScaler |> FastICA |> DQN & 0.33313 \\\\\n", + "\t8 & StandardScaler |> Noop |> DoubleDQN & 0.33318 \\\\\n", + "\t9 & StandardScaler |> PCA |> DoubleDQN & 0.333818 \\\\\n", + "\t10 & RobustScaler |> PCA |> DoubleDQN & 0.334047 \\\\\n", + "\t11 & Normalizer |> Noop |> DiscreteBCQ & 0.334562 \\\\\n", + "\t12 & Normalizer |> PCA |> DiscreteBCQ & 0.335437 \\\\\n", + "\t13 & RobustScaler |> FastICA |> DoubleDQN & 0.336118 \\\\\n", + "\t14 & MinMaxScaler |> FastICA |> DQN & 0.336319 \\\\\n", + "\t15 & StandardScaler |> FastICA |> DiscreteSAC & 0.336651 \\\\\n", + "\t16 & MinMaxScaler |> PCA |> DQN & 0.33804 \\\\\n", + "\t17 & RobustScaler |> Noop |> DoubleDQN & 0.338156 \\\\\n", + "\t18 & Normalizer |> Noop |> DoubleDQN & 0.338465 \\\\\n", + "\t19 & Normalizer |> FastICA |> DiscreteSAC & 0.338521 \\\\\n", + "\t20 & Normalizer |> FastICA |> DoubleDQN & 0.33963 \\\\\n", + "\t21 & RobustScaler |> PCA |> DiscreteBCQ & 0.33991 \\\\\n", + "\t22 & MinMaxScaler |> PCA |> DoubleDQN & 0.339956 \\\\\n", + "\t23 & PowerTransformer |> PCA |> DoubleDQN & 0.340461 \\\\\n", + "\t24 & Normalizer |> Noop |> DQN & 0.340522 \\\\\n", + "\t25 & PowerTransformer |> PCA |> DQN & 0.341005 \\\\\n", + "\t26 & MinMaxScaler |> PCA |> DiscreteBCQ & 0.341485 \\\\\n", + "\t27 & Normalizer |> FastICA |> DiscreteBCQ & 0.343531 \\\\\n", + "\t28 & MinMaxScaler |> FastICA |> DiscreteBCQ & 0.34357 \\\\\n", + "\t29 & MinMaxScaler |> FastICA |> DoubleDQN & 0.344844 \\\\\n", + "\t30 & MinMaxScaler |> Noop |> DiscreteSAC & 0.345202 \\\\\n", + "\t$\\dots$ & $\\dots$ & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m86×2 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m pipeline \u001b[0m\u001b[1m td_error \u001b[0m\n", + " │\u001b[90m String \u001b[0m\u001b[90m Float64 \u001b[0m\n", + "─────┼────────────────────────────────────────────────\n", + " 1 │ Noop |> FastICA |> DoubleDQN 0.325789\n", + " 2 │ StandardScaler |> FastICA |> DQN 0.329664\n", + " 3 │ PowerTransformer |> FastICA |> D… 0.330706\n", + " 4 │ PowerTransformer |> FastICA |> D… 0.331318\n", + " 5 │ Normalizer |> FastICA |> DQN 0.332194\n", + " 6 │ PowerTransformer |> FastICA |> D… 0.333078\n", + " 7 │ RobustScaler |> FastICA |> DQN 0.33313\n", + " 8 │ StandardScaler |> Noop |> Double… 0.33318\n", + " 9 │ StandardScaler |> PCA |> DoubleD… 0.333818\n", + " 10 │ RobustScaler |> PCA |> DoubleDQN 0.334047\n", + " 11 │ Normalizer |> Noop |> DiscreteBCQ 0.334562\n", + " ⋮ │ ⋮ ⋮\n", + " 77 │ Noop |> FactorAnalysis |> Discre… 13.7154\n", + " 78 │ Noop |> FactorAnalysis |> Discre… 2.26795e6\n", + " 79 │ Noop |> FactorAnalysis |> DQN 2.56554e10\n", + " 80 │ Noop |> FactorAnalysis |> Double… 2.69701e11\n", + " 81 │ Noop |> FactorAnalysis |> Discre… 2.61634e13\n", + " 82 │ Noop |> FactorAnalysis |> NFQ 2.48772e14\n", + " 83 │ PowerTransformer |> Noop |> DQN 5.34366e21\n", + " 84 │ PowerTransformer |> Noop |> Doub… 3.26964e22\n", + " 85 │ RobustScaler |> PCA |> NFQ 3.56353e25\n", + " 86 │ RobustScaler |> PCA |> DiscreteS… 3.36113e26\n", + "\u001b[36m 65 rows omitted\u001b[0m" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sort!(dftable,:td_error,rev=false)\n", + "dftable" + ] + }, + { + "cell_type": "markdown", + "id": "1e8d8712", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "##### Top 5 and last 5 results" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "259a9ae2", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
5×2 DataFrame
Rowpipelinetd_error
StringFloat64
1Noop |> FastICA |> DoubleDQN0.325789
2StandardScaler |> FastICA |> DQN0.329664
3PowerTransformer |> FastICA |> DQN0.330706
4PowerTransformer |> FastICA |> DiscreteSAC0.331318
5Normalizer |> FastICA |> DQN0.332194
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& pipeline & td\\_error\\\\\n", + "\t\\hline\n", + "\t& String & Float64\\\\\n", + "\t\\hline\n", + "\t1 & Noop |> FastICA |> DoubleDQN & 0.325789 \\\\\n", + "\t2 & StandardScaler |> FastICA |> DQN & 0.329664 \\\\\n", + "\t3 & PowerTransformer |> FastICA |> DQN & 0.330706 \\\\\n", + "\t4 & PowerTransformer |> FastICA |> DiscreteSAC & 0.331318 \\\\\n", + "\t5 & Normalizer |> FastICA |> DQN & 0.332194 \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m5×2 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m pipeline \u001b[0m\u001b[1m td_error \u001b[0m\n", + " │\u001b[90m String \u001b[0m\u001b[90m Float64 \u001b[0m\n", + "─────┼─────────────────────────────────────────────\n", + " 1 │ Noop |> FastICA |> DoubleDQN 0.325789\n", + " 2 │ StandardScaler |> FastICA |> DQN 0.329664\n", + " 3 │ PowerTransformer |> FastICA |> D… 0.330706\n", + " 4 │ PowerTransformer |> FastICA |> D… 0.331318\n", + " 5 │ Normalizer |> FastICA |> DQN 0.332194" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first(dftable,5)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "55812e1a", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
5×2 DataFrame
Rowpipelinetd_error
StringFloat64
1Noop |> FactorAnalysis |> NFQ2.48772e14
2PowerTransformer |> Noop |> DQN5.34366e21
3PowerTransformer |> Noop |> DoubleDQN3.26964e22
4RobustScaler |> PCA |> NFQ3.56353e25
5RobustScaler |> PCA |> DiscreteSAC3.36113e26
" + ], + "text/latex": [ + "\\begin{tabular}{r|cc}\n", + "\t& pipeline & td\\_error\\\\\n", + "\t\\hline\n", + "\t& String & Float64\\\\\n", + "\t\\hline\n", + "\t1 & Noop |> FactorAnalysis |> NFQ & 2.48772e14 \\\\\n", + "\t2 & PowerTransformer |> Noop |> DQN & 5.34366e21 \\\\\n", + "\t3 & PowerTransformer |> Noop |> DoubleDQN & 3.26964e22 \\\\\n", + "\t4 & RobustScaler |> PCA |> NFQ & 3.56353e25 \\\\\n", + "\t5 & RobustScaler |> PCA |> DiscreteSAC & 3.36113e26 \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m5×2 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m pipeline \u001b[0m\u001b[1m td_error \u001b[0m\n", + " │\u001b[90m String \u001b[0m\u001b[90m Float64 \u001b[0m\n", + "─────┼───────────────────────────────────────────────\n", + " 1 │ Noop |> FactorAnalysis |> NFQ 2.48772e14\n", + " 2 │ PowerTransformer |> Noop |> DQN 5.34366e21\n", + " 3 │ PowerTransformer |> Noop |> Doub… 3.26964e22\n", + " 4 │ RobustScaler |> PCA |> NFQ 3.56353e25\n", + " 5 │ RobustScaler |> PCA |> DiscreteS… 3.36113e26" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "last(dftable,5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a2f079f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Julia 1.9.0", + "language": "julia", + "name": "julia-1.9" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/AutoOfflineRL/examples/presentation.slides.html b/AutoOfflineRL/examples/presentation.slides.html new file mode 100644 index 0000000..b49bd49 --- /dev/null +++ b/AutoOfflineRL/examples/presentation.slides.html @@ -0,0 +1,15548 @@ + + + + + + + + + +presentation slides + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + + + + + + + + + diff --git a/AutoOfflineRL/examples/rl.png b/AutoOfflineRL/examples/rl.png new file mode 100644 index 0000000..bfc136e Binary files /dev/null and b/AutoOfflineRL/examples/rl.png differ diff --git a/AutoOfflineRL/examples/script.jl b/AutoOfflineRL/examples/script.jl new file mode 100644 index 0000000..787c139 --- /dev/null +++ b/AutoOfflineRL/examples/script.jl @@ -0,0 +1,90 @@ +using Distributed + +nprocs() == 1 && addprocs() + +@everywhere begin + using AutoOfflineRL + using AutoMLPipeline + using Parquet + using DataFrames +end + +@everywhere begin + # load preprocessing elements + #### Scaler + rb = SKPreprocessor("RobustScaler"); + pt = SKPreprocessor("PowerTransformer"); + norm = SKPreprocessor("Normalizer"); + mx = SKPreprocessor("MinMaxScaler"); + std = SKPreprocessor("StandardScaler") + ##### Column selector + catf = CatFeatureSelector(); + numf = NumFeatureSelector(); + ## load filters + ##### Decomposition + #apca = SKPreprocessor("PCA",Dict(:autocomponent=>true,:name=>"autoPCA")); + #afa = SKPreprocessor("FactorAnalysis",Dict(:autocomponent=>true,:name=>"autoFA")); + #aica = SKPreprocessor("FastICA",Dict(:autocomponent=>true,:name=>"autoICA")); + pca = SKPreprocessor("PCA"); + fa = SKPreprocessor("FactorAnalysis"); + ica = SKPreprocessor("FastICA"); + noop = Identity(Dict(:name => "Noop")); +end + +# load dataset +path = pkgdir(AutoOfflineRL) +dataset = "$path/data/smalldata.parquet" +df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing + +#df = df[:,["day", "hour", "minute", "dow"]] +#df.sensor1 = rand(1:500,srow) +#df.sensor2 = rand(1:200,srow) +#df.sensor3 = rand(1:100,srow) +#df.action = rand([10,50,100],srow) +#df.reward = rand(srow) + +srow,_ = size(df) +observation = df[:, ["day", "hour", "minute", "dow", "sensor1", "sensor2", "sensor3"]] +reward = df[:,["reward"]] |> deepcopy |> DataFrame +action = df[:,["action"]] |> deepcopy |> DataFrame +_terminals = zeros(Int,srow) +_terminals[collect(100:1000:9000)] .= 1 +_terminals[end] = 1 +dterminal = DataFrame(terminal=_terminals) +action_reward_terminal = DataFrame[action, reward, dterminal] + +agent = DiscreteRLOffline("NFQ") +pipe = (numf |> mx |> pca) |> agent +crossvalidateRL(pipe,observation,action_reward_terminal) + +function pipelinesearch() + agentnames = ["DiscreteCQL","NFQ","DoubleDQN","DiscreteSAC","DiscreteBCQ","DiscreteBC","DQN"] + scalers = [rb,pt,norm,std,mx,noop] + extractors = [pca,ica,fa,noop] + dfresults = @sync @distributed (vcat) for agentname in agentnames + @distributed (vcat) for sc in scalers + @distributed (vcat) for xt in extractors + try + rlagent = DiscreteRLOffline(agentname,Dict(:runtime_args=>Dict(:n_epochs=>1))) + rlpipeline = ((numf |> sc |> xt)) |> rlagent + res = crossvalidateRL(rlpipeline,observation,action_reward_terminal) + scn = sc.name[1:end - 4]; xtn = xt.name[1:end - 4]; lrn = rlagent.name[1:end - 4] + pname = "$scn |> $xtn |> $lrn" + if !isnan(res) + DataFrame(pipeline=pname,td_error=res) + else + DataFrame() + end + catch e + println("error in $agentname") + DataFrame() + end + end + end + end + #sort!(dfresults,:percent_action_matches,rev=true) + return dfresults +end +dftable= pipelinesearch() +sort!(dftable,:td_error,rev=false) +show(dftable,allcols=true,allrows=true,truncate=0) diff --git a/AutoOfflineRL/examples/test.py b/AutoOfflineRL/examples/test.py new file mode 100644 index 0000000..5db9a34 --- /dev/null +++ b/AutoOfflineRL/examples/test.py @@ -0,0 +1,30 @@ +from d3rlpy.datasets import get_cartpole +from d3rlpy.algos import DQN +from d3rlpy.metrics.scorer import td_error_scorer +from d3rlpy.metrics.scorer import average_value_estimation_scorer +from d3rlpy.metrics.scorer import evaluate_on_environment +from sklearn.model_selection import train_test_split + +dataset, env = get_cartpole() +train_episodes, test_episodes = train_test_split(dataset) +dqn = DQN() +dqn.fit(train_episodes, + eval_episodes=test_episodes, + n_epochs=1, + scorers={ + 'td_error': td_error_scorer, + 'value_scale': average_value_estimation_scorer + } + ) + + +from sklearn.model_selection import cross_validate + +scores = cross_validate(dqn, + dataset, + scoring={ + 'td_error': td_error_scorer, + 'value_scale': average_value_estimation_scorer + }, + fit_params={'n_epochs':1} + ) diff --git a/AutoOfflineRL/src/AutoOfflineRL.jl b/AutoOfflineRL/src/AutoOfflineRL.jl new file mode 100644 index 0000000..2ef51c6 --- /dev/null +++ b/AutoOfflineRL/src/AutoOfflineRL.jl @@ -0,0 +1,50 @@ +module AutoOfflineRL + +using DataFrames +using CSV + +export fit, fit!, transform, transform!,fit_transform, fit_transform! +import AMLPipelineBase.AbsTypes: fit, fit!, transform, transform! + +using AMLPipelineBase +using AMLPipelineBase: AbsTypes, Utils, BaselineModels, Pipelines +using AMLPipelineBase: BaseFilters, FeatureSelectors, DecisionTreeLearners +using AMLPipelineBase: EnsembleMethods, CrossValidators +using AMLPipelineBase: NARemovers + +export Machine, Learner, Transformer, Workflow, Computer +export holdout, kfold, score, infer_eltype, nested_dict_to_tuples, + nested_dict_set!, nested_dict_merge, create_transformer, + mergedict, getiris,getprofb, + skipmean,skipmedian,skipstd, + aggregatorclskipmissing +export Baseline, Identity +export Imputer,OneHotEncoder,Wrapper +export PrunedTree,RandomForest,Adaboost +export VoteEnsemble, StackEnsemble, BestLearner +export FeatureSelector, CatFeatureSelector, NumFeatureSelector, CatNumDiscriminator +export crossvalidate +export NARemover +export @pipeline +export @pipelinex +export @pipelinez +export +, |>, *, |, >> +export Pipeline, ComboPipeline + + + +#export RLMachine, RLOffline, WeeklyEpisodes + + +#Base.@kwdef struct WeeklyEpisodes <: RLMachine +# _params::Dict = Dict() +# _model::Dict = Dict() +#end + +include("offlinerls.jl") +using .OfflineRLs +export DiscreteRLOffline, fit! +export driver, listdiscreateagents +export crossvalidateRL + +end # module diff --git a/AutoOfflineRL/src/offlinerls.jl b/AutoOfflineRL/src/offlinerls.jl new file mode 100644 index 0000000..83348f5 --- /dev/null +++ b/AutoOfflineRL/src/offlinerls.jl @@ -0,0 +1,280 @@ +module OfflineRLs + +using AutoOfflineRL +using PythonCall +import Statistics +using Parquet +using Distributed +using DataFrames: DataFrame, dropmissing +using Random +using CSV +using Dates + +using ..AbsTypes +using ..Utils +import ..AbsTypes: fit, fit!, transform, transform! + +import PythonCall +const PYC = PythonCall + +using ..Utils: nested_dict_merge + +export DiscreteRLOffline, fit!, transform!, fit, transform +export listdiscreateagents, driver +export crossvalidateRL + +const rl_dict = Dict{String, PYC.Py}() +const metric_dict = Dict{String, PYC.Py}() + +const PYRL = PYC.pynew() +const PYPD = PYC.pynew() +const PYNP = PYC.pynew() +const PYDT = PYC.pynew() +const PYMT = PYC.pynew() +const PYSK = PYC.pynew() + + +function __init__() + PYC.pycopy!(PYRL, PYC.pyimport("d3rlpy.algos")) + PYC.pycopy!(PYDT, PYC.pyimport("d3rlpy.datasets")) + PYC.pycopy!(PYSK, PYC.pyimport("sklearn.model_selection")) + PYC.pycopy!(PYMT, PYC.pyimport("d3rlpy.metrics")) + PYC.pycopy!(PYPD, PYC.pyimport("pandas")) + PYC.pycopy!(PYNP, PYC.pyimport("numpy")) + + # OfflineRLs + metric_dict["cross_validate"] = PYSK.cross_validate + metric_dict["train_test_split"] = PYSK.train_test_split + metric_dict["td_error_scorer"] = PYMT.td_error_scorer + metric_dict["discrete_action_match_scorer"] = PYMT.discrete_action_match_scorer + metric_dict["average_value_estimation_scorer"] = PYMT.average_value_estimation_scorer + metric_dict["get_cartpole"] = PYDT.get_cartpole + + rl_dict["DiscreteBC"] = PYRL + rl_dict["DQN"] = PYRL + rl_dict["NFQ"] = PYRL + rl_dict["DoubleDQN"] = PYRL + rl_dict["DiscreteBCQ"] = PYRL + rl_dict["DiscreteCQL"] = PYRL + rl_dict["DiscreteSAC"] = PYRL + #rl_dict["DiscreteRandomPolicy"] = PYRL +end + +mutable struct DiscreteRLOffline <: Learner + name::String + model::Dict{Symbol,Any} + + function DiscreteRLOffline(args=Dict{Symbol,Any}()) + default_args = Dict{Symbol,Any}( + :name => "DQN", + :tag => "RLOffline", + :rlagent => "DQN", + :iterations => 100, + :save_metrics => false, + :rlobjtrained => PYC.PyNULL, + :o_header => ["day", "hour", "minute", "dow", "metric1", "metric2", "metric3", "metric4"], + :a_header => ["action"], + :r_header => ["reward"], + :save_model => false, + :runtime_args => Dict{Symbol, Any}( + :n_epochs => 3, + ), + :impl_args => Dict{Symbol,Any}( + :scaler => "min_max", + :use_gpu => false, + ) + ) + cargs = nested_dict_merge(default_args,args) + #datestring = Dates.format(now(), "yyyy-mm-dd-HH-MM") + cargs[:name] = cargs[:name]*"_"*randstring(3) + rlagent = cargs[:rlagent] + if !(rlagent in keys(rl_dict)) + println("error: $rlagent is not supported.") + println() + discreteagents() + error("Argument keyword error") + end + new(cargs[:name],cargs) + end +end + +function DiscreteRLOffline(rlagent::String, args::Dict) + DiscreteRLOffline(Dict(:rlagent => rlagent, :name => rlagent, args...)) +end + +function DiscreteRLOffline(rlagent::String; args...) + DiscreteRLOffline(Dict( + :rlagent => rlagent, + :name => rlagent, + args... + )) +end + +function listdiscreateagents() + println() + println("RL Discrete Agents:") + agents = keys(rl_dict) |> collect + [println(" ",agent," ") for agent in agents] + println("See d3rlpy python package for details about the agent arguments.") + nothing +end + +function discreteagents() + println() + println("syntax: DiscreteRLOffline(name::String, args::Dict)") + println("and 'args' are the agent's parameters") + println("See d3rlpy python package for details about the agent arguments.") + println("use: listdiscreateagents() to get the available RL agents") +end + +function createmdpdata!(agent::DiscreteRLOffline, df::DataFrame, action_reward_term::Vector) + _observations = df |> Array .|> PYC.float |> x -> PYNP.array(x, dtype = "float32") + _actions = action_reward_term[1] |> Array .|> PYC.float |> x -> PYNP.array(x, dtype = "float32") + _rewards = action_reward_term[2] |> Array .|> PYC.float |> x -> PYNP.array(x, dtype = "float32") + _terminals = action_reward_term[3] |> Array .|> PYC.float |> x -> PYNP.array(x, dtype = "float32") + ## create dataset for RLOffline + mdp_dataset = PYDT.MDPDataset( + observations = _observations, + actions = _actions, + rewards = _rewards, + terminals = _terminals, + ) + ## save params + agent.model[:mdp_dataset] = mdp_dataset + agent.model[:np_observations] = _observations + agent.model[:np_actions] = _actions + agent.model[:np_rewards] = _rewards + return mdp_dataset +end + +function checkheaders(agent::DiscreteRLOffline, df) + o_header = agent.model[:o_header] + a_header = agent.model[:a_header] + r_header = agent.model[:r_header] + dfnames = names(df) + [@assert header in dfnames "\"$header\" is not in data header" + for header in vcat(o_header, a_header, r_header)] +end + +function fit!(agent::DiscreteRLOffline, df::DataFrame, action_reward_term::Vector)::Nothing + # check if headers exist + #checkheaders(agent::DiscreteRLOffline, df) + # create mdp data + nrow, ncol = size(df) + mdp_dataset = createmdpdata!(agent, df,action_reward_term) + ## prepare algorithm + runtime_args = agent.model[:runtime_args] + logging = agent.model[:save_metrics] + impl_args = copy(agent.model[:impl_args]) + rlagent = agent.model[:rlagent] + py_rlagent = getproperty(rl_dict[rlagent],rlagent) + pyrlobj = py_rlagent(;impl_args...) + pyrlobj.fit(mdp_dataset; save_metrics = logging, runtime_args... ) + ## save rl to model dictionary + agent.model[:rlobjtrained] = pyrlobj + agent.model[:nrow] = nrow + agent.model[:ncol] = ncol + ## save model to file + if agent.model[:save_model] == true + path = pkgdir(AutoOfflineRL) + agentname = agent.model[:name] + tag = agent.model[:tag] + fnmodel = "$path/model/$(agentname)_$(tag)_model.pt" + fnpolicy = "$path/model/$(agentname)_$(tag)_policy.pt" + pyrlobj.save_model(fnmodel) + pyrlobj.save_policy(fnpolicy) + end + return nothing +end + + +function transform!(agent::DiscreteRLOffline,df::DataFrame=DataFrame())::Vector + pyrlobj = agent.model[:rlobjtrained] + #o_header = agent.model[:o_header] + observations = df |> Array .|> PYC.float |> x -> PYNP.array(x, dtype = "float32") + res = map(observations) do obs + action = pyrlobj.predict(obs) + value = pyrlobj.predict_value([obs],action) + action = PYC.pyconvert.(Float64,action) + value = PYC.pyconvert.(Float64,value) + obs = PYC.pyconvert.(Float64,obs) + (;obs,action,value) + end + return res +end + +function prp_fit_transform(pipe::Machine, instances::DataFrame,actrewterm::Vector) + machines = pipe.model[:machines] + machine_args = pipe.model[:machine_args] + + current_instances = instances + trlength = length(machines) + for t_index in 1:(trlength - 1) + machine = createmachine(machines[t_index], machine_args) + fit!(machine, current_instances, actrewterm) + current_instances = transform!(machine, current_instances) + end + return current_instances +end + + +function driver() + path = pkgdir(AutoOfflineRL) + dataset = "$path/data/smalldata.parquet" + df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing + df_input = df[:, ["day", "hour", "minute", "dow", "metric1", "metric2", "metric3", "metric4"]] + reward = df[:,["reward"]] |> deepcopy |> DataFrame + action = df[:,["action"]] |> deepcopy |> DataFrame + action_reward = DataFrame[action, reward] + agentname="NFQ" + agent = DiscreteRLOffline(agentname) + #fit_transform!(agent,df_input,action_reward) +end + +function traintesteval(agent::DiscreteRLOffline,mdp_dataset::Py) + runtime_args = agent.model[:runtime_args] + logging = agent.model[:save_metrics] + impl_args = copy(agent.model[:impl_args]) + rlagent = agent.model[:rlagent] + py_rlagent = getproperty(rl_dict[rlagent],rlagent) + pyrlobj = py_rlagent(;impl_args...) + py_train_test_split = metric_dict["train_test_split"] + trainepisodes,testepisodes = py_train_test_split(mdp_dataset) + + td_error_scorer = PYMT.td_error_scorer + discrete_action_match_scorer = PYMT.discrete_action_match_scorer + runconfig = Dict(:scorers=>Dict("td_error"=>td_error_scorer)) + #runconfig = Dict(:scorers=>Dict("metric"=>discrete_action_match_scorer)) + score=pyrlobj.fit(trainepisodes; + eval_episodes=testepisodes, + runtime_args...,runconfig...) + vals = pyconvert(Array,score) + mvals = [v[2]["td_error"] for v in vals] |> Statistics.mean + #mvals = [v[2]["metric"] for v in vals] |> Statistics.mean + return mvals +end + +function crossvalidateRL(pp::Machine, dfobs::DataFrame, actrewterm::Vector; cv=3) + pipe = deepcopy(pp) + features = deepcopy(dfobs) + machines = pipe.model[:machines] + agent = machines[end] + + df_input = prp_fit_transform(pipe,features,actrewterm) + mdp_dataset = createmdpdata!(agent,df_input,actrewterm) + + scores= [traintesteval(agent,mdp_dataset) for i in 1:cv] + return Statistics.mean(scores) + + #pyskcrossvalidate = metric_dict["cross_validate"] + #td_error_scorer = metric_dict["td_error_scorer"] + #average_value_estimation_scorer = metric_dict["average_value_estimation_scorer"] + #runconfig = Dict(:scoring=>Dict("td_error"=>td_error_scorer, + # "value_scale"=>average_value_estimation_scorer), + # :fit_params=>Dict("n_epochs"=>1)) + #scores = pyskcrossvalidate(pyrlobj,mdp_dataset; runconfig...) + #return scores +end + + +end diff --git a/AutoOfflineRL/test/runtests.jl b/AutoOfflineRL/test/runtests.jl new file mode 100644 index 0000000..97708f9 --- /dev/null +++ b/AutoOfflineRL/test/runtests.jl @@ -0,0 +1,65 @@ +module TestOfflineRL +using AutoOfflineRL +using Test +using DataFrames +using PythonCall +using Parquet +const PYC=PythonCall + +@testset "Load Agents with Default Params" begin + for agentid in keys(AutoOfflineRL.OfflineRLs.rl_dict) + @info "loading $agentid default params" + rlagent = DiscreteRLOffline(agentid) + @test typeof(rlagent) <: AutoOfflineRL.Learner + end +end + + +@testset "Load Agents with Param Args" begin + println() + for agentid in keys(AutoOfflineRL.OfflineRLs.rl_dict) + @info "loading $agentid with customized params" + rlagent = DiscreteRLOffline(agentid, + Dict(:name=>agentid, + :iterations=>10000, + :epochs=>100) + ) + @test typeof(rlagent) <: AutoOfflineRL.Learner + end +end + +@testset "Test Exceptions" begin + @test_throws ErrorException DiscreteRLOffline("dummy") +end + +@testset "Test Agent fit!/transform Runs" begin + println() + path = pkgdir(AutoOfflineRL) + dataset = "$path/data/smalldata.parquet" + df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing + srow,_ = size(df) + df_input = df[:, ["day", "hour", "minute", "dow", "sensor1", "sensor2", "sensor3"]] + reward = df[:,["reward"]] |> deepcopy |> DataFrame + action = df[:,["action"]] |> deepcopy |> DataFrame + _terminals = zeros(Int,srow) + _terminals[collect(100:1000:9000)] .= 1 + _terminals[end] = 1 + dterminal = DataFrame(terminal=_terminals) + action_reward_terminal = DataFrame[action, reward, dterminal] + for agentid in keys(AutoOfflineRL.OfflineRLs.rl_dict) + @info "training $agentid" + agent = DiscreteRLOffline(agentid; save_model=false,runtime_args=Dict(:n_epochs=>1,:verbose=>false, :show_progress=>true)) + o_header = agent.model[:o_header] + fit!(agent,df_input,action_reward_terminal) + @test agent.model[:rlobjtrained] !== PYC.PyNULL + @info "transform $agentid" + adf = df_input[1:2,:] + if agentid != "DiscreteBC" + res = AutoOfflineRL.transform!(agent,adf) + @test typeof(res[1]) .== NamedTuple{(:obs,:action, :value), Tuple{Vector{Float64},Vector{Float64}, Vector{Float64}}} + end + end +end + + +end