IBM · ppalmes · Jun 22, 2023 · May 31, 2023 · May 31, 2023 · Jun 13, 2023
diff --git a/AutoOfflineRL/CondaPkg.toml b/AutoOfflineRL/CondaPkg.toml
@@ -0,0 +1,10 @@
+channels = ["mkl", "scikit-learn"]
+
+[deps]
+scikit-learn = ""
+pandas = ""
+numpy = ""
+python = ""
+
+[pip.deps]
+d3rlpy = ""
diff --git a/AutoOfflineRL/LICENSE b/AutoOfflineRL/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Paulito Palmes, PhD <ppalmes@gmail.com> and contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/AutoOfflineRL/Project.toml b/AutoOfflineRL/Project.toml
@@ -0,0 +1,26 @@
+name = "AutoOfflineRL"
+uuid = "4680bba7-2b59-4a6e-a544-0ebac8b8cdd3"
+authors = ["Paulito Palmes, PhD <ppalmes@gmail.com>"]
+version = "0.1.0"
+
+[deps]
+AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+MicroMamba = "0b3b1443-0f03-428d-bdfb-f27f9c1191ea"
+Parquet = "626c502c-15b0-58ad-a749-f091afb673ae"
+PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[compat]
+julia = "1"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/AutoOfflineRL/README.md b/AutoOfflineRL/README.md
diff --git a/AutoOfflineRL/data/smalldata.parquet b/AutoOfflineRL/data/smalldata.parquet
diff --git a/AutoOfflineRL/examples/Project.toml b/AutoOfflineRL/examples/Project.toml
@@ -0,0 +1,9 @@
+[deps]
+AutoMLPipeline = "08437348-eef5-4817-bc1b-d4e9459680d6"
+AutoOfflineRL = "4680bba7-2b59-4a6e-a544-0ebac8b8cdd3"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
+Parquet = "626c502c-15b0-58ad-a749-f091afb673ae"
+PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
diff --git a/AutoOfflineRL/examples/demo.jl b/AutoOfflineRL/examples/demo.jl
@@ -0,0 +1,90 @@
+using Distributed
+
+nprocs() == 1 && addprocs() 
+
+@everywhere begin
+   using AutoOfflineRL
+   using AutoMLPipeline
+   using Parquet
+   using DataFrames
+end
+
+@everywhere begin
+   # load preprocessing elements
+   #### Scaler
+   rb = SKPreprocessor("RobustScaler");
+   pt = SKPreprocessor("PowerTransformer");
+   norm = SKPreprocessor("Normalizer");
+   mx = SKPreprocessor("MinMaxScaler");
+   std = SKPreprocessor("StandardScaler")
+   ##### Column selector
+   catf = CatFeatureSelector();
+   numf = NumFeatureSelector();
+   ## load filters
+   ##### Decomposition
+   #apca = SKPreprocessor("PCA",Dict(:autocomponent=>true,:name=>"autoPCA"));
+   #afa = SKPreprocessor("FactorAnalysis",Dict(:autocomponent=>true,:name=>"autoFA"));
+   #aica = SKPreprocessor("FastICA",Dict(:autocomponent=>true,:name=>"autoICA"));
+   pca = SKPreprocessor("PCA");
+   fa = SKPreprocessor("FactorAnalysis");
+   ica = SKPreprocessor("FastICA");
+   noop = Identity(Dict(:name => "Noop"));
+end
+
+# load dataset
+path = pkgdir(AutoOfflineRL)
+dataset = "$path/data/smalldata.parquet"
+df = Parquet.read_parquet(dataset) |> DataFrame |> dropmissing
+
+#df = df[:,["day", "hour", "minute", "dow"]]
+#df.sensor1 = rand(1:500,srow)
+#df.sensor2 = rand(1:200,srow)
+#df.sensor3 = rand(1:100,srow)
+#df.action = rand([10,50,100],srow)
+#df.reward = rand(srow)
+
+srow,_ = size(df)
+observation = df[:, ["day", "hour", "minute", "dow", "sensor1", "sensor2", "sensor3"]]
+reward = df[:,["reward"]] |> deepcopy |> DataFrame
+action = df[:,["action"]] |> deepcopy |> DataFrame
+_terminals = zeros(Int,srow)
+_terminals[collect(100:1000:9000)] .= 1
+_terminals[end] = 1
+dterminal = DataFrame(terminal=_terminals)
+action_reward_terminal = DataFrame[action, reward, dterminal]
+
+agent = DiscreteRLOffline("NFQ")
+pipe = (numf |> mx |> pca) |> agent
+crossvalidateRL(pipe,observation,action_reward_terminal)
+
+function pipelinesearch()
+   agentnames = ["DiscreteCQL","NFQ","DoubleDQN","DiscreteSAC","DiscreteBCQ","DiscreteBC","DQN"]
+   scalers =  [rb,pt,norm,std,mx,noop]
+   extractors = [pca,ica,fa,noop]
+   dfresults = @sync @distributed (vcat) for agentname in agentnames
+      @distributed (vcat) for sc in scalers
+         @distributed (vcat) for xt  in extractors
+            try
+               rlagent = DiscreteRLOffline(agentname,Dict(:runtime_args=>Dict(:n_epochs=>1)))
+               rlpipeline = ((numf |> sc |> xt)) |> rlagent 
+               res = crossvalidateRL(rlpipeline,observation,action_reward_terminal)
+               scn   = sc.name[1:end - 4]; xtn = xt.name[1:end - 4]; lrn = rlagent.name[1:end - 4]
+               pname = "$scn |> $xtn |> $lrn"
+               if !isnan(res)
+                  DataFrame(pipeline=pname,td_error=res)
+               else
+                  DataFrame()
+               end
+            catch e
+               println("error in $agentname")
+               DataFrame()
+            end
+         end
+      end
+   end
+   #sort!(dfresults,:percent_action_matches,rev=true)
+   return dfresults
+end
+dftable= pipelinesearch()
+sort!(dftable,:td_error,rev=false)
+show(dftable,allcols=true,allrows=true,truncate=0)
diff --git a/AutoOfflineRL/examples/offlinerl.png b/AutoOfflineRL/examples/offlinerl.png