pvlv bug fixes, boa initially working

emer · Mar 8, 2024 · e4d4334 · e4d4334
1 parent 2e42733
commit e4d4334
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 41 deletions.
diff --git a/axon/pvlv.go b/axon/pvlv.go
@@ -717,6 +717,7 @@ func (pp *PVLV) ResetGoalState(ctx *Context, di uint32) {
 	pp.Urgency.Reset(ctx, di)
 	pp.TimeEffortReset(ctx, di)
 	pp.USs.USnegToZero(ctx, di) // all negs restart
+	pp.USs.CostToZero(ctx, di)
 	pp.ResetGiveUp(ctx, di)
 	SetGlbV(ctx, di, GvVSPatchPos, 0)
 	SetGlbV(ctx, di, GvVSPatchPosPrev, 0)
@@ -941,6 +942,19 @@ func (pp *PVLV) PVnegEstFmUSs(uss []float32) (pvNegSum, pvNeg float32) {
 	return
 }
 
+// PVcostEstFmUSs returns the estimated negative PV value
+// based on given externally-provided Cost values.
+// This can be used to compute estimates to compare network performance.
+func (pp *PVLV) PVcostEstFmCosts(costs []float32) (pvCostSum, pvNeg float32) {
+	nn := pp.NCosts
+	wts := pp.USs.PVcostWts
+	for i := uint32(0); i < nn; i++ {
+		pvCostSum += wts[i] * costs[i]
+	}
+	pvNeg = PVLVNormFun(pp.USs.PVnegGain * pvCostSum)
+	return
+}
+
 // DAFmPVs computes the overall PV DA in terms of LHb burst and dip
 // activity from given pvPos, pvNeg, and vsPatchPos values.
 // Also returns the net "reward" value as the discounted PV value,

diff --git a/axon/pvlv_net.go b/axon/pvlv_net.go
@@ -929,32 +929,32 @@ func (net *Network) AddBOA(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfc
 	_, _, _ = accCost, accCostCT, accCostPTp
 	_, _ = blaNegAcq, blaNegExt
 
-	// ILposP is what ACCutil predicts, in order to learn about value (reward)
+	// ILposP is what PLutil predicts, in order to learn about value (reward)
 	ilPosP := net.AddPulvForSuper(ilPos, space)
 
-	// ILnegP is what ACCutil predicts, in order to learn about cost
+	// ILnegP is what PLutil predicts, in order to learn about cost
 	ilNegP := net.AddPulvForSuper(ilNeg, space)
 
 	pfc2m := params.Params{ // contextual, not driving -- weaker
 		"Prjn.PrjnScale.Rel": "0.1",
 	}
 
-	accUtil, accUtilCT, accUtilPT, accUtilPTp, accUtilMD := net.AddPFC2D("ACCutil", "MD", pfcY, pfcX, true, space)
-	vSmtxGo.SetBuildConfig("ThalLay5Name", accUtilMD.Name())
-	vSmtxNo.SetBuildConfig("ThalLay5Name", accUtilMD.Name())
-	net.ConnectLayers(vSgpi, accUtilMD, full, InhibPrjn)
+	plUtil, plUtilCT, plUtilPT, plUtilPTp, plUtilMD := net.AddPFC2D("PLutil", "MD", pfcY, pfcX, true, space)
+	vSmtxGo.SetBuildConfig("ThalLay5Name", plUtilMD.Name())
+	vSmtxNo.SetBuildConfig("ThalLay5Name", plUtilMD.Name())
+	net.ConnectLayers(vSgpi, plUtilMD, full, InhibPrjn)
 
-	accUtilPT.DefParams["Layer.Acts.Dend.ModACh"] = "true"
+	plUtilPT.DefParams["Layer.Acts.Dend.ModACh"] = "true"
 
-	pj = net.ConnectToVSMatrix(accUtil, vSmtxGo, full)
+	pj = net.ConnectToVSMatrix(plUtil, vSmtxGo, full)
 	pj.DefParams = pfc2m
 	pj.SetClass("PFCToVSMtx")
 
-	pj = net.ConnectToVSMatrix(accUtil, vSmtxNo, full)
+	pj = net.ConnectToVSMatrix(plUtil, vSmtxNo, full)
 	pj.DefParams = pfc2m
 	pj.SetClass("PFCToVSMtx")
 
-	net.ConnectToVSPatch(accUtilPTp, vSpatch, full)
+	net.ConnectToVSPatch(plUtilPTp, vSpatch, full)
 
 	///////////////////////////////////////////
 	// ILneg
@@ -966,24 +966,24 @@ func (net *Network) AddBOA(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfc
 	// net.ConnectLayers(dist, ilNegPTPred, full, ForwardPrjn).SetClass("ToPTPred")
 
 	///////////////////////////////////////////
-	// ACCutil
+	// PLutil
 
-	// net.ConnectCTSelf(accUtilCT, full) // todo: test
+	// net.ConnectCTSelf(plUtilCT, full) // todo: test
 
 	// util predicts OFCval and ILneg
-	pj, _ = net.ConnectToPFCBidir(ilPos, ilPosP, accUtil, accUtilCT, accUtilPTp, full)
+	pj, _ = net.ConnectToPFCBidir(ilPos, ilPosP, plUtil, plUtilCT, plUtilPTp, full)
 	pj.DefParams = params.Params{
 		"Prjn.PrjnScale.Abs": "1", // not good to make this stronger actually
 	}
-	pj, _ = net.ConnectToPFCBidir(ilNeg, ilNegP, accUtil, accUtilCT, accUtilPTp, full)
+	pj, _ = net.ConnectToPFCBidir(ilNeg, ilNegP, plUtil, plUtilCT, plUtilPTp, full)
 	pj.DefParams = params.Params{
 		"Prjn.PrjnScale.Abs": "3", // drive acc stronger -- only this one works well
 	}
 
-	ilPos.PlaceRightOf(ofcPosUS, space)
+	// ilPos.PlaceRightOf(ofcPosUS, space)
 	ilPosP.PlaceBehind(ilPosMD, space)
 	ilNegP.PlaceBehind(ilNegMD, space)
-	accUtil.PlaceRightOf(ilNeg, 3*space)
+	plUtil.PlaceRightOf(accCost, space)
 
 	return
 }
diff --git a/examples/boa/armaze/maze.go b/examples/boa/armaze/maze.go
@@ -262,7 +262,7 @@ func (ev *Env) ExValueUtil(pv *axon.PVLV, ctx *axon.Context) {
 	maxutil := float32(0)
 	ev.ArmsNeg = nil
 	usPos := make([]float32, pv.NPosUSs)
-	usNeg := make([]float32, pv.NNegUSs)
+	cost := make([]float32, pv.NCosts)
 	for i, arm := range ev.Config.Arms {
 		us := ev.Config.USs[arm.US]
 		if us.Negative {
@@ -274,15 +274,15 @@ func (ev *Env) ExValueUtil(pv *axon.PVLV, ctx *axon.Context) {
 		for j := range usPos { // reset
 			usPos[j] = 0
 		}
-		for j := range usNeg { // reset
-			usNeg[j] = 0
+		for j := range cost { // reset
+			cost[j] = 0
 		}
 		usPos[arm.US+1] = val
 		_, pvPos := pv.PVposEstFmUSs(ctx, uint32(ev.Di), usPos)
 		exTime := float32(arm.Length) + 1 // time
-		usNeg[0] = exTime
-		usNeg[1] = exTime * arm.Effort.Midpoint()
-		_, pvNeg := pv.PVnegEstFmUSs(usNeg)
+		cost[0] = exTime
+		cost[1] = exTime * arm.Effort.Midpoint()
+		_, pvNeg := pv.PVcostEstFmCosts(cost)
 		burst, dip, da, rew := pv.DAFmPVs(pvPos, pvNeg, 0)
 		_, _, _ = burst, dip, rew
 		arm.ExPVpos = pvPos

diff --git a/examples/boa/boa.go b/examples/boa/boa.go
@@ -177,13 +177,7 @@ func (ss *Sim) ConfigPVLV(trn *armaze.Env) {
 	pv.USs.PVposGain = 2  // higher = more pos reward (saturating logistic func)
 	pv.USs.PVnegGain = .1 // global scaling of PV neg level -- was 1
 
-	pv.USs.USnegGains[0] = 0.1 // time: if USneg pool is saturating, reduce
-	pv.USs.USnegGains[1] = 0.1 // effort: if USneg pool is saturating, reduce
-	pv.USs.USnegGains[2] = 2   // big salient input!
-
-	pv.USs.PVnegWts[0] = 0.02 // time: controls overall PVneg -- if too high, not enough reward..
-	pv.USs.PVnegWts[1] = 0.02 // effort: controls overall PVneg -- if too high, not enough reward..
-	pv.USs.PVnegWts[2] = 1
+	pv.USs.USnegGains[0] = 2 // big salient input!
 
 	pv.Drive.DriveMin = 0.5 // 0.5 -- should be
 	pv.Urgency.U50 = 10
@@ -220,11 +214,11 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	ny := ev.Config.Params.NYReps
 	narm := ev.Config.NArms
 
-	vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPTp, accUtil, sc, notMaint := net.AddBOA(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
-	_, _ = accUtil, urgency
+	vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPTp, plUtil, sc := net.AddBOA(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
+	_, _ = plUtil, urgency
 	_, _ = ofcNegUSCT, ofcNegUSPTp
 
-	accUtilPTp := net.AxonLayerByName("ACCutilPTp")
+	plUtilPTp := net.AxonLayerByName("PLutilPTp")
 
 	cs, csP := net.AddInputPulv2D("CS", ny, ev.Config.NCSs, space)
 	pos, posP := net.AddInputPulv2D("Pos", ny, ev.MaxLength+1, space)
@@ -300,10 +294,10 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	// ALM, M1 <-> OFC, ACC
 
 	// action needs to know if maintaining a goal or not
-	// using accUtil as main summary "driver" input to action system
+	// using plUtil as main summary "driver" input to action system
 	// PTp provides good notmaint signal for action.
-	net.ConnectLayers(accUtilPTp, alm, full, axon.ForwardPrjn).SetClass("ToALM")
-	net.ConnectLayers(accUtilPTp, m1, full, axon.ForwardPrjn).SetClass("ToM1")
+	net.ConnectLayers(plUtilPTp, alm, full, axon.ForwardPrjn).SetClass("ToALM")
+	net.ConnectLayers(plUtilPTp, m1, full, axon.ForwardPrjn).SetClass("ToM1")
 
 	// note: in Obelisk this helps with the Consume action
 	// but here in this example it produces some instability
@@ -322,8 +316,6 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	vl.PlaceBehind(m1P, space)
 	act.PlaceBehind(vl, space)
 
-	notMaint.PlaceRightOf(alm, space)
-
 	net.Build(ctx)
 	net.Defaults()
 	net.SetNThreads(ss.Config.Run.NThreads)
@@ -499,7 +491,7 @@ func (ss *Sim) ConfigLoops() {
 func (ss *Sim) TakeAction(net *axon.Network) {
 	ctx := &ss.Context
 	pv := &ss.Net.PVLV
-	mtxLy := ss.Net.AxonLayerByName("VsMtxGo")
+	mtxLy := ss.Net.AxonLayerByName("VMtxGo")
 	vlly := ss.Net.AxonLayerByName("VL")
 	threshold := float32(0.1)
 	for di := 0; di < int(ctx.NetIdxs.NData); di++ {

diff --git a/examples/boa/params.go b/examples/boa/params.go
@@ -76,6 +76,7 @@ var ParamSets = netparams.Sets{
 				"Layer.Inhib.Pool.Gi":     "0.3",   // go lower, get more inhib from elsewhere?
 				"Layer.Inhib.Pool.FB":     "1",
 				"Layer.Acts.Dend.ModGain": "1", // todo: 2 is default
+				"Layer.Acts.Kir.Gbar":     "2",
 			}},
 		{Sel: "#BLAPosAcqD1", Desc: "",
 			Params: params.Params{
@@ -204,7 +205,7 @@ var ParamSets = netparams.Sets{
 			Params: params.Params{
 				"Prjn.PrjnScale.Abs": "5", // with new mod, this can be stronger
 			}},
-		{Sel: "#UrgencyToVsMtxGo", Desc: "",
+		{Sel: "#UrgencyToVMtxGo", Desc: "",
 			Params: params.Params{
 				"Prjn.PrjnScale.Abs": "4", // 4 good -- 1,2 too weak
 			}},

diff --git a/examples/pcore_ds/params.go b/examples/pcore_ds/params.go
@@ -28,13 +28,15 @@ var ParamSets = netparams.Sets{
 				"Layer.Learn.NeuroMod.BurstGain": "0.1",  // 0.1 == 0.2 > 0.05 > 0.5 -- key lrate modulator
 				"Layer.Learn.RLRate.On":          "true", // note: applied for tr update trials
 				"Layer.Learn.TrgAvgAct.On":       "true", // true > false
-				"Layer.Acts.Mahp.Gbar":           "0.01",
-				"Layer.Acts.Sahp.Gbar":           "0.02", // todo test
+				"Layer.Acts.Mahp.Gbar":           "0.01", // 0.01 > 0.02 > 0
+				"Layer.Acts.Sahp.Gbar":           "0.05", // todo test
 			},
 			Hypers: params.Hypers{
 				"Layer.Learn.NeuroMod.BurstGain": {"Tweak": "-"},
 				"Layer.Acts.Kir.Gbar":            {"Tweak": "-"},
 				"Layer.Inhib.Pool.Gi":            {"Tweak": "-"},
+				"Layer.Acts.Mahp.Gbar":           {"Tweak": "[0.012,0.015,0.005]"},
+				"Layer.Acts.Sahp.Gbar":           {"Tweak": "[0.04,0.03]"},
 			}},
 		{Sel: ".DSTNLayer", Desc: "all STN",
 			Params: params.Params{
@@ -60,6 +62,8 @@ var ParamSets = netparams.Sets{
 				"Layer.Acts.Dend.ModGain":   "1.0",   // 1.5 def
 				"Layer.Acts.Kir.Gbar":       "0",     // no real diff here over range 0-10
 				"Layer.Acts.MaintNMDA.Gbar": "0.007", // 0.007 default
+				"Layer.Acts.Mahp.Gbar":      "0.0",   //
+				"Layer.Acts.Sahp.Gbar":      "0.0",   //
 			}},
 		{Sel: "#MotorBS", Desc: "",
 			Params: params.Params{