Skip to content

Commit

Permalink
pvlv bug fixes, boa initially working
Browse files Browse the repository at this point in the history
  • Loading branch information
rcoreilly committed Mar 8, 2024
1 parent 2e42733 commit e4d4334
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 41 deletions.
14 changes: 14 additions & 0 deletions axon/pvlv.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ func (pp *PVLV) ResetGoalState(ctx *Context, di uint32) {
pp.Urgency.Reset(ctx, di)
pp.TimeEffortReset(ctx, di)
pp.USs.USnegToZero(ctx, di) // all negs restart
pp.USs.CostToZero(ctx, di)
pp.ResetGiveUp(ctx, di)
SetGlbV(ctx, di, GvVSPatchPos, 0)
SetGlbV(ctx, di, GvVSPatchPosPrev, 0)
Expand Down Expand Up @@ -941,6 +942,19 @@ func (pp *PVLV) PVnegEstFmUSs(uss []float32) (pvNegSum, pvNeg float32) {
return
}

// PVcostEstFmUSs returns the estimated negative PV value
// based on given externally-provided Cost values.
// This can be used to compute estimates to compare network performance.
func (pp *PVLV) PVcostEstFmCosts(costs []float32) (pvCostSum, pvNeg float32) {
nn := pp.NCosts
wts := pp.USs.PVcostWts
for i := uint32(0); i < nn; i++ {
pvCostSum += wts[i] * costs[i]
}
pvNeg = PVLVNormFun(pp.USs.PVnegGain * pvCostSum)
return
}

// DAFmPVs computes the overall PV DA in terms of LHb burst and dip
// activity from given pvPos, pvNeg, and vsPatchPos values.
// Also returns the net "reward" value as the discounted PV value,
Expand Down
32 changes: 16 additions & 16 deletions axon/pvlv_net.go
Original file line number Diff line number Diff line change
Expand Up @@ -929,32 +929,32 @@ func (net *Network) AddBOA(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfc
_, _, _ = accCost, accCostCT, accCostPTp
_, _ = blaNegAcq, blaNegExt

// ILposP is what ACCutil predicts, in order to learn about value (reward)
// ILposP is what PLutil predicts, in order to learn about value (reward)
ilPosP := net.AddPulvForSuper(ilPos, space)

// ILnegP is what ACCutil predicts, in order to learn about cost
// ILnegP is what PLutil predicts, in order to learn about cost
ilNegP := net.AddPulvForSuper(ilNeg, space)

pfc2m := params.Params{ // contextual, not driving -- weaker
"Prjn.PrjnScale.Rel": "0.1",
}

accUtil, accUtilCT, accUtilPT, accUtilPTp, accUtilMD := net.AddPFC2D("ACCutil", "MD", pfcY, pfcX, true, space)
vSmtxGo.SetBuildConfig("ThalLay5Name", accUtilMD.Name())
vSmtxNo.SetBuildConfig("ThalLay5Name", accUtilMD.Name())
net.ConnectLayers(vSgpi, accUtilMD, full, InhibPrjn)
plUtil, plUtilCT, plUtilPT, plUtilPTp, plUtilMD := net.AddPFC2D("PLutil", "MD", pfcY, pfcX, true, space)
vSmtxGo.SetBuildConfig("ThalLay5Name", plUtilMD.Name())
vSmtxNo.SetBuildConfig("ThalLay5Name", plUtilMD.Name())
net.ConnectLayers(vSgpi, plUtilMD, full, InhibPrjn)

accUtilPT.DefParams["Layer.Acts.Dend.ModACh"] = "true"
plUtilPT.DefParams["Layer.Acts.Dend.ModACh"] = "true"

pj = net.ConnectToVSMatrix(accUtil, vSmtxGo, full)
pj = net.ConnectToVSMatrix(plUtil, vSmtxGo, full)
pj.DefParams = pfc2m
pj.SetClass("PFCToVSMtx")

pj = net.ConnectToVSMatrix(accUtil, vSmtxNo, full)
pj = net.ConnectToVSMatrix(plUtil, vSmtxNo, full)
pj.DefParams = pfc2m
pj.SetClass("PFCToVSMtx")

net.ConnectToVSPatch(accUtilPTp, vSpatch, full)
net.ConnectToVSPatch(plUtilPTp, vSpatch, full)

///////////////////////////////////////////
// ILneg
Expand All @@ -966,24 +966,24 @@ func (net *Network) AddBOA(ctx *Context, nYneur, popY, popX, bgY, bgX, pfcY, pfc
// net.ConnectLayers(dist, ilNegPTPred, full, ForwardPrjn).SetClass("ToPTPred")

///////////////////////////////////////////
// ACCutil
// PLutil

// net.ConnectCTSelf(accUtilCT, full) // todo: test
// net.ConnectCTSelf(plUtilCT, full) // todo: test

// util predicts OFCval and ILneg
pj, _ = net.ConnectToPFCBidir(ilPos, ilPosP, accUtil, accUtilCT, accUtilPTp, full)
pj, _ = net.ConnectToPFCBidir(ilPos, ilPosP, plUtil, plUtilCT, plUtilPTp, full)
pj.DefParams = params.Params{
"Prjn.PrjnScale.Abs": "1", // not good to make this stronger actually
}
pj, _ = net.ConnectToPFCBidir(ilNeg, ilNegP, accUtil, accUtilCT, accUtilPTp, full)
pj, _ = net.ConnectToPFCBidir(ilNeg, ilNegP, plUtil, plUtilCT, plUtilPTp, full)
pj.DefParams = params.Params{
"Prjn.PrjnScale.Abs": "3", // drive acc stronger -- only this one works well
}

ilPos.PlaceRightOf(ofcPosUS, space)
// ilPos.PlaceRightOf(ofcPosUS, space)
ilPosP.PlaceBehind(ilPosMD, space)
ilNegP.PlaceBehind(ilNegMD, space)
accUtil.PlaceRightOf(ilNeg, 3*space)
plUtil.PlaceRightOf(accCost, space)

return
}
12 changes: 6 additions & 6 deletions examples/boa/armaze/maze.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func (ev *Env) ExValueUtil(pv *axon.PVLV, ctx *axon.Context) {
maxutil := float32(0)
ev.ArmsNeg = nil
usPos := make([]float32, pv.NPosUSs)
usNeg := make([]float32, pv.NNegUSs)
cost := make([]float32, pv.NCosts)
for i, arm := range ev.Config.Arms {
us := ev.Config.USs[arm.US]
if us.Negative {
Expand All @@ -274,15 +274,15 @@ func (ev *Env) ExValueUtil(pv *axon.PVLV, ctx *axon.Context) {
for j := range usPos { // reset
usPos[j] = 0
}
for j := range usNeg { // reset
usNeg[j] = 0
for j := range cost { // reset
cost[j] = 0
}
usPos[arm.US+1] = val
_, pvPos := pv.PVposEstFmUSs(ctx, uint32(ev.Di), usPos)
exTime := float32(arm.Length) + 1 // time
usNeg[0] = exTime
usNeg[1] = exTime * arm.Effort.Midpoint()
_, pvNeg := pv.PVnegEstFmUSs(usNeg)
cost[0] = exTime
cost[1] = exTime * arm.Effort.Midpoint()
_, pvNeg := pv.PVcostEstFmCosts(cost)
burst, dip, da, rew := pv.DAFmPVs(pvPos, pvNeg, 0)
_, _, _ = burst, dip, rew
arm.ExPVpos = pvPos
Expand Down
24 changes: 8 additions & 16 deletions examples/boa/boa.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,7 @@ func (ss *Sim) ConfigPVLV(trn *armaze.Env) {
pv.USs.PVposGain = 2 // higher = more pos reward (saturating logistic func)
pv.USs.PVnegGain = .1 // global scaling of PV neg level -- was 1

pv.USs.USnegGains[0] = 0.1 // time: if USneg pool is saturating, reduce
pv.USs.USnegGains[1] = 0.1 // effort: if USneg pool is saturating, reduce
pv.USs.USnegGains[2] = 2 // big salient input!

pv.USs.PVnegWts[0] = 0.02 // time: controls overall PVneg -- if too high, not enough reward..
pv.USs.PVnegWts[1] = 0.02 // effort: controls overall PVneg -- if too high, not enough reward..
pv.USs.PVnegWts[2] = 1
pv.USs.USnegGains[0] = 2 // big salient input!

pv.Drive.DriveMin = 0.5 // 0.5 -- should be
pv.Urgency.U50 = 10
Expand Down Expand Up @@ -220,11 +214,11 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
ny := ev.Config.Params.NYReps
narm := ev.Config.NArms

vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPTp, accUtil, sc, notMaint := net.AddBOA(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
_, _ = accUtil, urgency
vSgpi, urgency, pvPos, blaPosAcq, blaPosExt, blaNegAcq, blaNegExt, blaNov, ofcPosUS, ofcPosUSCT, ofcPosUSPTp, ilPos, ilPosCT, ilPosPTp, ofcNegUS, ofcNegUSCT, ofcNegUSPTp, ilNeg, ilNegCT, ilNegPTp, plUtil, sc := net.AddBOA(ctx, ny, popY, popX, nuBgY, nuBgX, nuCtxY, nuCtxX, space)
_, _ = plUtil, urgency
_, _ = ofcNegUSCT, ofcNegUSPTp

accUtilPTp := net.AxonLayerByName("ACCutilPTp")
plUtilPTp := net.AxonLayerByName("PLutilPTp")

cs, csP := net.AddInputPulv2D("CS", ny, ev.Config.NCSs, space)
pos, posP := net.AddInputPulv2D("Pos", ny, ev.MaxLength+1, space)
Expand Down Expand Up @@ -300,10 +294,10 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
// ALM, M1 <-> OFC, ACC

// action needs to know if maintaining a goal or not
// using accUtil as main summary "driver" input to action system
// using plUtil as main summary "driver" input to action system
// PTp provides good notmaint signal for action.
net.ConnectLayers(accUtilPTp, alm, full, axon.ForwardPrjn).SetClass("ToALM")
net.ConnectLayers(accUtilPTp, m1, full, axon.ForwardPrjn).SetClass("ToM1")
net.ConnectLayers(plUtilPTp, alm, full, axon.ForwardPrjn).SetClass("ToALM")
net.ConnectLayers(plUtilPTp, m1, full, axon.ForwardPrjn).SetClass("ToM1")

// note: in Obelisk this helps with the Consume action
// but here in this example it produces some instability
Expand All @@ -322,8 +316,6 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
vl.PlaceBehind(m1P, space)
act.PlaceBehind(vl, space)

notMaint.PlaceRightOf(alm, space)

net.Build(ctx)
net.Defaults()
net.SetNThreads(ss.Config.Run.NThreads)
Expand Down Expand Up @@ -499,7 +491,7 @@ func (ss *Sim) ConfigLoops() {
func (ss *Sim) TakeAction(net *axon.Network) {
ctx := &ss.Context
pv := &ss.Net.PVLV
mtxLy := ss.Net.AxonLayerByName("VsMtxGo")
mtxLy := ss.Net.AxonLayerByName("VMtxGo")
vlly := ss.Net.AxonLayerByName("VL")
threshold := float32(0.1)
for di := 0; di < int(ctx.NetIdxs.NData); di++ {
Expand Down
3 changes: 2 additions & 1 deletion examples/boa/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ var ParamSets = netparams.Sets{
"Layer.Inhib.Pool.Gi": "0.3", // go lower, get more inhib from elsewhere?
"Layer.Inhib.Pool.FB": "1",
"Layer.Acts.Dend.ModGain": "1", // todo: 2 is default
"Layer.Acts.Kir.Gbar": "2",
}},
{Sel: "#BLAPosAcqD1", Desc: "",
Params: params.Params{
Expand Down Expand Up @@ -204,7 +205,7 @@ var ParamSets = netparams.Sets{
Params: params.Params{
"Prjn.PrjnScale.Abs": "5", // with new mod, this can be stronger
}},
{Sel: "#UrgencyToVsMtxGo", Desc: "",
{Sel: "#UrgencyToVMtxGo", Desc: "",
Params: params.Params{
"Prjn.PrjnScale.Abs": "4", // 4 good -- 1,2 too weak
}},
Expand Down
8 changes: 6 additions & 2 deletions examples/pcore_ds/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ var ParamSets = netparams.Sets{
"Layer.Learn.NeuroMod.BurstGain": "0.1", // 0.1 == 0.2 > 0.05 > 0.5 -- key lrate modulator
"Layer.Learn.RLRate.On": "true", // note: applied for tr update trials
"Layer.Learn.TrgAvgAct.On": "true", // true > false
"Layer.Acts.Mahp.Gbar": "0.01",
"Layer.Acts.Sahp.Gbar": "0.02", // todo test
"Layer.Acts.Mahp.Gbar": "0.01", // 0.01 > 0.02 > 0
"Layer.Acts.Sahp.Gbar": "0.05", // todo test
},
Hypers: params.Hypers{
"Layer.Learn.NeuroMod.BurstGain": {"Tweak": "-"},
"Layer.Acts.Kir.Gbar": {"Tweak": "-"},
"Layer.Inhib.Pool.Gi": {"Tweak": "-"},
"Layer.Acts.Mahp.Gbar": {"Tweak": "[0.012,0.015,0.005]"},
"Layer.Acts.Sahp.Gbar": {"Tweak": "[0.04,0.03]"},
}},
{Sel: ".DSTNLayer", Desc: "all STN",
Params: params.Params{
Expand All @@ -60,6 +62,8 @@ var ParamSets = netparams.Sets{
"Layer.Acts.Dend.ModGain": "1.0", // 1.5 def
"Layer.Acts.Kir.Gbar": "0", // no real diff here over range 0-10
"Layer.Acts.MaintNMDA.Gbar": "0.007", // 0.007 default
"Layer.Acts.Mahp.Gbar": "0.0", //
"Layer.Acts.Sahp.Gbar": "0.0", //
}},
{Sel: "#MotorBS", Desc: "",
Params: params.Params{
Expand Down

0 comments on commit e4d4334

Please sign in to comment.