diff --git a/src/main/scala/symsim/concrete/BdlConcreteExpectedSarsa.scala b/src/main/scala/symsim/concrete/BdlConcreteExpectedSarsa.scala index 3a17541..3cd56a6 100644 --- a/src/main/scala/symsim/concrete/BdlConcreteExpectedSarsa.scala +++ b/src/main/scala/symsim/concrete/BdlConcreteExpectedSarsa.scala @@ -13,7 +13,8 @@ case class BdlConcreteExpectedSarsa [ val gamma: Double, val epsilon0: Probability, val episodes: Int, -) extends BdlLearn[State, ObservableState, Action, Double, Randomized2], +) (using val rng: probula.RNG) + extends BdlLearn[State, ObservableState, Action, Double, Randomized2], ConcreteExactRL[State, ObservableState, Action], NoDecay: diff --git a/src/main/scala/symsim/concrete/BdlConcreteSarsa.scala b/src/main/scala/symsim/concrete/BdlConcreteSarsa.scala index 1fbad14..4ef45a3 100644 --- a/src/main/scala/symsim/concrete/BdlConcreteSarsa.scala +++ b/src/main/scala/symsim/concrete/BdlConcreteSarsa.scala @@ -13,7 +13,8 @@ case class BdlConcreteSarsa [ val gamma: Double, val epsilon0: Probability, val episodes: Int, -) extends BdlLearn[State, ObservableState, Action, Double, Randomized2], +) (using val rng: probula.RNG) + extends BdlLearn[State, ObservableState, Action, Double, Randomized2], ConcreteExactRL[State, ObservableState, Action], NoDecay: diff --git a/src/main/scala/symsim/concrete/ConcreteExactRL.scala b/src/main/scala/symsim/concrete/ConcreteExactRL.scala index a75e484..5ff51fa 100644 --- a/src/main/scala/symsim/concrete/ConcreteExactRL.scala +++ b/src/main/scala/symsim/concrete/ConcreteExactRL.scala @@ -3,9 +3,6 @@ package concrete import cats.kernel.BoundedEnumerable -given spire.random.rng.SecureJava = - spire.random.rng.SecureJava.apply - trait ConcreteExactRL[State, ObservableState, Action] extends ExactRL[State, ObservableState, Action, Double, Randomized2]: @@ -24,10 +21,12 @@ trait ConcreteExactRL[State, ObservableState, Action] // TODO: unclear if this is general (if it turns out to be the same im // symbolic or approximate algos we should promote this to the trait + + given rng: probula.RNG def runQ: (Q, List[Q]) = - val initials = agent.initialize.sample(episodes) - val outcome = learn (vf.initialize, List[VF](), initials).sample() + val initials = agent.initialize.sample (episodes) + val outcome = learn (vf.initialize, List[VF] (), initials).sample () (outcome._1, outcome._2) override def run: Policy = diff --git a/src/main/scala/symsim/concrete/ConcreteExpectedSarsa.scala b/src/main/scala/symsim/concrete/ConcreteExpectedSarsa.scala index 579626a..eeff03a 100644 --- a/src/main/scala/symsim/concrete/ConcreteExpectedSarsa.scala +++ b/src/main/scala/symsim/concrete/ConcreteExpectedSarsa.scala @@ -7,7 +7,8 @@ case class ConcreteExpectedSarsa[State, ObservableState, Action] ( val gamma: Double, val epsilon0: Probability, val episodes: Int, -) extends ExpectedSarsa[State, ObservableState, Action, Double, Randomized2], +) (using val rng: probula.RNG) + extends ExpectedSarsa[State, ObservableState, Action, Double, Randomized2], ConcreteExactRL[State, ObservableState, Action], NoDecay: diff --git a/src/main/scala/symsim/concrete/ConcreteQLearning.scala b/src/main/scala/symsim/concrete/ConcreteQLearning.scala index ed9c722..833adb9 100644 --- a/src/main/scala/symsim/concrete/ConcreteQLearning.scala +++ b/src/main/scala/symsim/concrete/ConcreteQLearning.scala @@ -13,7 +13,8 @@ case class ConcreteQLearning [ val gamma: Double, val epsilon0: Probability, val episodes: Int, -) extends QLearning[State, ObservableState, Action, Double, Randomized2], +) (using val rng: probula.RNG) + extends QLearning[State, ObservableState, Action, Double, Randomized2], ConcreteExactRL[State, ObservableState, Action], NoDecay: diff --git a/src/main/scala/symsim/concrete/ConcreteQLearningWithDecay.scala b/src/main/scala/symsim/concrete/ConcreteQLearningWithDecay.scala index 34c1707..cddb0e5 100644 --- a/src/main/scala/symsim/concrete/ConcreteQLearningWithDecay.scala +++ b/src/main/scala/symsim/concrete/ConcreteQLearningWithDecay.scala @@ -15,7 +15,8 @@ case class ConcreteQLearningWithDecay [ val epsilon0: Probability, val episodes: Int, -) extends QLearning[State, ObservableState, Action, Double, Randomized2], +) (using val rng: probula.RNG) + extends QLearning[State, ObservableState, Action, Double, Randomized2], ConcreteExactRL[State, ObservableState, Action], BoundedEpsilonDecay: diff --git a/src/main/scala/symsim/concrete/ConcreteSarsa.scala b/src/main/scala/symsim/concrete/ConcreteSarsa.scala index 7afc355..0b7528a 100644 --- a/src/main/scala/symsim/concrete/ConcreteSarsa.scala +++ b/src/main/scala/symsim/concrete/ConcreteSarsa.scala @@ -13,7 +13,8 @@ case class ConcreteSarsa [ val gamma: Double, val epsilon0: Probability, val episodes: Int, -) extends Sarsa[State, ObservableState, Action, Double, Randomized2], +) (using val rng: probula.RNG) + extends Sarsa[State, ObservableState, Action, Double, Randomized2], ConcreteExactRL[State, ObservableState, Action], NoDecay: diff --git a/src/main/scala/symsim/concrete/Randomized2.scala b/src/main/scala/symsim/concrete/Randomized2.scala index 6db66ac..54828ef 100644 --- a/src/main/scala/symsim/concrete/Randomized2.scala +++ b/src/main/scala/symsim/concrete/Randomized2.scala @@ -95,7 +95,7 @@ object Randomized2: /** Perform an imperative operation that depends on one sample from this * Randomized. This is mostly meant for IO at this point. */ - def run (f: A => Unit): Unit = f(self.sample ()) + def run (f: A => Unit) (using RNG): Unit = f(self.sample ()) def filter (p: A => Boolean): Randomized2[A] = self.filter (p) diff --git a/src/main/scala/symsim/examples/concrete/simplebandit/Bandit.scala b/src/main/scala/symsim/examples/concrete/simplebandit/Bandit.scala index e90c551..ea28d09 100644 --- a/src/main/scala/symsim/examples/concrete/simplebandit/Bandit.scala +++ b/src/main/scala/symsim/examples/concrete/simplebandit/Bandit.scala @@ -59,7 +59,7 @@ class BanditInstances (banditReward: List [Randomized2[BanditReward]]) (using pr extends AgentConstraints[BanditState, BanditState, BanditAction, BanditReward, Randomized2]: given enumAction: BoundedEnumerable[BanditAction] = - BoundedEnumerableFromList (List.range(0, banditReward.size)*) + BoundedEnumerableFromList (List.range (0, banditReward.size)*) given enumState: BoundedEnumerable[BanditState] = BoundedEnumerableFromList (false, true) diff --git a/src/test/scala/symsim/ExperimentSpec.scala b/src/test/scala/symsim/ExperimentSpec.scala index 7219fc3..3668ddc 100644 --- a/src/test/scala/symsim/ExperimentSpec.scala +++ b/src/test/scala/symsim/ExperimentSpec.scala @@ -10,8 +10,6 @@ import symsim.concrete.Randomized2 import cats.syntax.all.* import symsim.concrete.Randomized2.* -given spire.random.rng.SecureJava = - spire.random.rng.SecureJava.apply trait ExperimentSpec[State, ObservableState, Action] extends org.scalatest.freespec.AnyFreeSpec, @@ -70,12 +68,13 @@ trait ExperimentSpec[State, ObservableState, Action] policies: List[setup.Policy], initials: Option[Randomized2[State]] = None, noOfEpisodes: Int = 5 - ): EvaluationResults = + ) (using probula.RNG): EvaluationResults = val ss: Randomized2[State] = initials.getOrElse (setup.agent.initialize) for p <- policies episodeRewards: Randomized2[Randomized2[Double]] = setup.evaluate (p, ss) - rewards: Randomized2[Double] = episodeRewards.map { e => e.sample () } + rewards: Randomized2[Double] = + episodeRewards.map { e => e.sample () } yield rewards.sample (noOfEpisodes).toList diff --git a/src/test/scala/symsim/concrete/BdlConcreteExpectedSarsaIsExpectedSarsa.scala b/src/test/scala/symsim/concrete/BdlConcreteExpectedSarsaIsExpectedSarsa.scala index 8b04a5b..4603d76 100644 --- a/src/test/scala/symsim/concrete/BdlConcreteExpectedSarsaIsExpectedSarsa.scala +++ b/src/test/scala/symsim/concrete/BdlConcreteExpectedSarsaIsExpectedSarsa.scala @@ -5,6 +5,8 @@ import symsim.examples.concrete.mountaincar.MountainCar private val mountainCar = new MountainCar (using spire.random.rng.SecureJava.apply) +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply import mountainCar.instances.given diff --git a/src/test/scala/symsim/examples/concrete/braking/CarIsAgentSpec.scala b/src/test/scala/symsim/examples/concrete/braking/CarIsAgentSpec.scala index 1cb2e88..e2aeea6 100644 --- a/src/test/scala/symsim/examples/concrete/braking/CarIsAgentSpec.scala +++ b/src/test/scala/symsim/examples/concrete/braking/CarIsAgentSpec.scala @@ -4,8 +4,12 @@ package examples.concrete.braking import laws.AgentLaws import laws.EpisodicLaws +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply +private val car = new Car + class CarIsAgentSpec extends SymSimSpec: - checkAll ("concrete.braking.Car is an Agent", AgentLaws (new Car).laws) - checkAll ("concrete.braking.Car is Episodic", EpisodicLaws (new Car).laws) + checkAll ("concrete.braking.Car is an Agent", AgentLaws (car).laws) + checkAll ("concrete.braking.Car is Episodic", EpisodicLaws (car).laws) diff --git a/src/test/scala/symsim/examples/concrete/cartpole/Experiments.scala b/src/test/scala/symsim/examples/concrete/cartpole/Experiments.scala index 374e64a..6b578f5 100644 --- a/src/test/scala/symsim/examples/concrete/cartpole/Experiments.scala +++ b/src/test/scala/symsim/examples/concrete/cartpole/Experiments.scala @@ -1,9 +1,9 @@ package symsim package examples.concrete.cartpole -// Import evidence that states and actions can be enumerated -private val cartPole = - new CartPole (using spire.random.rng.SecureJava.apply) +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply +private val cartPole: CartPole = new CartPole import cartPole.instances.{enumAction, enumState} class Experiments extends diff --git a/src/test/scala/symsim/examples/concrete/cliffwalking/Experiments.scala b/src/test/scala/symsim/examples/concrete/cliffwalking/Experiments.scala index ddbc62b..a5a4456 100644 --- a/src/test/scala/symsim/examples/concrete/cliffwalking/Experiments.scala +++ b/src/test/scala/symsim/examples/concrete/cliffwalking/Experiments.scala @@ -1,8 +1,10 @@ package symsim package examples.concrete.cliffWalking -private val cliffWalking = - new CliffWalking (using spire.random.rng.SecureJava.apply) + +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply +private val cliffWalking: CliffWalking = new CliffWalking import cliffWalking.instances.{enumAction, enumState} class Experiments diff --git a/src/test/scala/symsim/examples/concrete/mountaincar/Experiments.scala b/src/test/scala/symsim/examples/concrete/mountaincar/Experiments.scala index 99ba079..eea1996 100644 --- a/src/test/scala/symsim/examples/concrete/mountaincar/Experiments.scala +++ b/src/test/scala/symsim/examples/concrete/mountaincar/Experiments.scala @@ -1,8 +1,10 @@ package symsim package examples.concrete.mountaincar -private val mountainCar = - new MountainCar (using spire.random.rng.SecureJava.apply) + +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply +private val mountainCar: MountainCar = new MountainCar import mountainCar.instances.{enumAction, enumState} class Experiments diff --git a/src/test/scala/symsim/examples/concrete/pumping/Experiments.scala b/src/test/scala/symsim/examples/concrete/pumping/Experiments.scala index 4f12e6d..8ace3b6 100644 --- a/src/test/scala/symsim/examples/concrete/pumping/Experiments.scala +++ b/src/test/scala/symsim/examples/concrete/pumping/Experiments.scala @@ -1,7 +1,10 @@ package symsim package examples.concrete.pumping -private val pump = new Pump (using spire.random.rng.SecureJava.apply) + +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply +private val pump: Pump = new Pump class Experiments extends ExperimentSpec[PumpState, ObservablePumpState, PumpAction]: diff --git a/src/test/scala/symsim/examples/concrete/simplebandit/Experiments.scala b/src/test/scala/symsim/examples/concrete/simplebandit/Experiments.scala index aa48e90..634162d 100644 --- a/src/test/scala/symsim/examples/concrete/simplebandit/Experiments.scala +++ b/src/test/scala/symsim/examples/concrete/simplebandit/Experiments.scala @@ -1,6 +1,9 @@ package symsim package examples.concrete.simplebandit +private given spire.random.rng.SecureJava = + spire.random.rng.SecureJava.apply + class Experiments extends ExperimentSpec[BanditState,BanditState,BanditAction]: diff --git a/src/test/scala/symsim/examples/concrete/simplemaze/SarsaExperiments.scala b/src/test/scala/symsim/examples/concrete/simplemaze/SarsaExperiments.scala index 2c8a8bd..26c5d6d 100644 --- a/src/test/scala/symsim/examples/concrete/simplemaze/SarsaExperiments.scala +++ b/src/test/scala/symsim/examples/concrete/simplemaze/SarsaExperiments.scala @@ -1,7 +1,7 @@ package symsim package examples.concrete.simplemaze -given spire.random.rng.SecureJava = +private given spire.random.rng.SecureJava = spire.random.rng.SecureJava.apply private val maze = new Maze