Slight updates, mostly debugging.

My confusion before was about how the model matrices were still updating even though I wasn't accepting anything in the updater. It turns out that the SMF code will update it in the mupdate method. Ugh ...
BIDData · DanielTakeshi · Feb 16, 2017 · Feb 16, 2017 · Mar 8, 2017 · Mar 8, 2017
commit 9c225433b44d8f57ab910cd9cf79b72d3f9a9a4f
diff --git a/scripts/daniel_smf_netflix_mhtest.ssc b/scripts/daniel_smf_netflix_mhtest.ssc
@@ -3,7 +3,28 @@ import BIDMach.models.SMF
 
 /**
  * Test SMF code on netflix data. This will use OUR MHTest updater, which I put
- * in as a new updater (SMF.learner2) to make this script more concise.
+ * in as a new updater (SMF.learner2) to make this script more concise. Some
+ * notes on the netflix dataset:
+ *
+ * size(a) = (17770,480189)
+ * a.nnz = 90430138
+ * min=0, max=5
+ * 
+ * (a == 1).nnz = 4156151
+ * (a == 2).nnz = 9120198
+ * (a == 3).nnz = 25928920
+ * (a == 4).nnz = 30375037
+ * (a == 5).nnz = 20849832
+ * mean (of nonzeros) = 3.6042476
+ * sqrt((diff ddot diff) / diff.nn) = 1.0852 // Train RMSE using mean predictor
+ *
+ * (ta == 1).nnz = 461839
+ * (ta == 2).nnz = 1011882
+ * (ta == 3).nnz = 2882327
+ * (ta == 4).nnz = 3375921
+ * (ta == 5).nnz = 2318400
+ * mean (of nonzeros) = 3.6046705
+ * sqrt((diff ddot diff) / diff.nn) = 1.0851 // Test RMSE using mean predictor
  */
 
 // Get random seed set up.
@@ -40,10 +61,10 @@ opts.matrixOfScores = true
 // Daniel Seita: actually, a batch size of 2000 means we may get 100k "elements"
 // due to the sparsity. So I'm thinking we stick to batch sizes of 1000 or less.
 opts.batchSize = 1000
+opts.npasses = 2
 opts.uiter = 5
 opts.urate = 0.05f
 opts.lrate = 0.05f  
-opts.npasses = 3
 val lambda = 4f
 opts.lambdau = lambda
 opts.regumean = lambda
@@ -57,7 +78,7 @@ nn.train
 
 val model = nn.model.asInstanceOf[SMF]
 val xa = (ta != 0)
-val (mm, mopts) = SMF.predictor1(model, a, xa)
+val (mm, mopts) = SMF.predictor1(model, a, xa) // Provide `a` or `ta` as input?
 mopts.batchSize = 10000
 mopts.uiter = 5
 mopts.urate = opts.urate
@@ -68,5 +89,5 @@ val pa = SMat(mm.preds(1));
 min(pa.contents,5,pa.contents)
 max(pa.contents,1,pa.contents)
 val diff = ta.contents - pa.contents
-val rmse = sqrt((diff ^* diff) / diff.length)
+val rmse = sqrt((diff ddot diff) / diff.length)
 println("rmse = %f" format rmse.v)
diff --git a/src/main/scala/BIDMach/models/SMF.scala b/src/main/scala/BIDMach/models/SMF.scala
@@ -241,12 +241,10 @@ class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts
    * if we're assuming a Gaussian error distribution. 
    * 
    * Note: it looks scary to  subtract iavg+avg from sdata0, but we don't add
-   * that to preds so we can still directly compare sdata and preds. I'll leave
-   * it here since John may have had a reason or doing that.
+   * that to preds so we can still directly compare sdata and preds.
    */
-  def evalfun(sdata0:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
-  	val sdata = sdata0 - (iavg + avg);
-    val preds = DDS(mm, user, sdata);
+  def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
+    val preds = DDS(mm, user, sdata) + (iavg + avg);
     if (ogmats != null) {
       ogmats(0) = user;
       if (ogmats.length > 1) {
@@ -255,11 +253,17 @@ class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts
     }
   	val dc = sdata.contents
   	val pc = preds.contents
-  	val diff = dc - pc;
+  	val diff = DMat(dc - pc);
   	if (opts.matrixOfScores) {
   	  // TODO Temporary but should be OK for now (b/c we almost never increment MB).
   	  val sigma_sq = variance(diff).dv
-  	  -(1.0f/(2*sigma_sq)).v * (diff ddot diff)
+
+  	  //println("evalfun, sdata.contents.length = " +dc.length)
+  	  //println("mean of squared diffs = " +(diff ddot diff)/diff.length)
+  	  //println("sigma_sq = " +sigma_sq)
+  	  //println("result = " +mean(-(1.0f/(2*sigma_sq)).v * FMat(diff *@ diff)))
+
+  	  -(1.0f/(2*sigma_sq)).v * FMat(diff *@ diff)
   	} else {
   	  val vv = diff ddot diff;
   	  -sqrt(row(vv/sdata.nnz))
@@ -284,6 +288,7 @@ class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts
         ogmats(1) = xpreds;
       }
     }
+  	println("TESTING evalfun, spreds.nnz="+spreds.nnz+", xpreds.nnz="+xpreds.nnz)
   	preds.contents <-- xpreds.contents;
   	-sqrt(row(vv/sdata.nnz))
   }

diff --git a/src/main/scala/BIDMach/updaters/MHTest.scala b/src/main/scala/BIDMach/updaters/MHTest.scala
@@ -79,6 +79,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
    * Note that the file for the norm2logdata should be in the correct directory.
    */
   override def init(model0:Model) = {
+    setseed(1)
     model = model0;
     modelmats = model.modelmats
     updatemats = model.updatemats
@@ -218,7 +219,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
         modelmats(i) <-- tmpTheta(i) // Now modelmats back to old theta.
       }
     }
-    if (newMinibatch && accept) afterEachMinibatch()
+    if (newMinibatch) afterEachMinibatch()
   }