BIDData · DanielTakeshi · Feb 16, 2017 · Feb 16, 2017 · Mar 8, 2017 · Mar 8, 2017
diff --git a/bidmach b/bidmach
@@ -2,7 +2,7 @@
 # export JAVA_HOME="" # Set here if not set in environment
 # export CUDA_PATH="" # Set here if not set in environment
 
-MEMSIZE="-Xmx14G"
+MEMSIZE="-Xmx40G"
 export JAVA_OPTS="${MEMSIZE} -Xms128M -Dfile.encoding=UTF-8" # Set as much memory as possible
 BIDMACH_ROOT="${BASH_SOURCE[0]}"
 if [ ! `uname` = "Darwin" ]; then

diff --git a/scripts/daniel_smf_netflix_adagrad.ssc b/scripts/daniel_smf_netflix_adagrad.ssc
@@ -0,0 +1,73 @@
+:silent
+import BIDMach.models.SMF
+
+/**
+ * Test SMF code on netflix data. This will use default ADAGrad. In general,
+ * RMSEs of roughly 0.83 to 0.85 are "good".
+ */
+
+// Same code as in the MHTest+ADAGrad script.
+setseed(0)
+val dir = "/data/netflix/"
+val a = loadSMat(dir+"newtrain.smat.lz4")
+val ta = loadSMat(dir+"newtest.smat.lz4")
+val d = 256
+val lrates = row(0.0001, 0.001, 0.01, 0.1)
+val langs = row(0.0, 0.05, 0.5)
+var bestrmse = 10.0;
+var prettystring = "lrate  lang.  rmse\n"
+
+for (i <- 0 until lrates.length) {
+    for (k <- 0 until langs.length) {
+        val (nn,opts) = SMF.learner1(a, d)
+
+        // Common parameters with the MHTest+ADAGrad version.
+        opts.batchSize = 1000
+        opts.npasses = 2
+        opts.nesterov = null
+        opts.langevin = langs(k).v
+        opts.momentum = 1f
+
+        opts.uiter = 5
+        opts.urate = 0.05f
+        opts.lrate = lrates(i).v
+        val lambda = 4f
+        opts.lambdau = lambda
+        opts.regumean = lambda
+        opts.lambdam = lambda / 500000 * 20
+        opts.regmmean = opts.lambdam
+        opts.evalStep = 31
+        opts.doUsers = false
+        opts.what
+        nn.train
+
+        val model = nn.model.asInstanceOf[SMF]
+        val xa = (ta != 0)
+        val (mm, mopts) = SMF.predictor1(model, a, xa)
+        mopts.batchSize = 10000
+        mopts.uiter = 5
+        mopts.urate = opts.urate
+        mopts.aopts = null
+        mm.predict
+
+        val pa = SMat(mm.preds(1));
+        println("Note: max(pa)="+maxi(maxi(pa))+" and min(pa)="+mini(mini(pa)))
+        val diff = ta.contents - pa.contents
+        val rmse = sqrt((diff ddot diff) / diff.length)
+        println("\nrmse = %f" format rmse.v)
+        min(pa.contents,5,pa.contents)
+        max(pa.contents,1,pa.contents)
+        val diff2 = ta.contents - pa.contents
+        val rmse2 = sqrt((diff2 ddot diff2) / diff2.length)
+        println("rmse (w/clipping) = %f\n" format rmse2.v)
+
+        if (rmse2.v < bestrmse) {
+            bestrmse = rmse2.v
+        }
+        prettystring += "%1.5f  %1.3f  %1.4f\n" format (lrates(i).v,langs(k).v,rmse2.v)
+    }
+}
+
+println("\nBest RMSE: "+bestrmse+ "\n")
+println(prettystring)
+sys.exit
diff --git a/scripts/daniel_smf_netflix_mhtest.ssc b/scripts/daniel_smf_netflix_mhtest.ssc
@@ -0,0 +1,118 @@
+:silent
+import BIDMach.models.SMF
+
+/**
+ * Test SMF code on netflix data. This will use OUR MHTest updater, which I put
+ * in as a new updater (SMF.learner2) to make this script more concise. Some
+ * notes on the netflix dataset:
+ *
+ * size(a) = (17770,480189)
+ * a.nnz = 90430138
+ * min=0, max=5
+ * 
+ * (a == 1).nnz = 4156151
+ * (a == 2).nnz = 9120198
+ * (a == 3).nnz = 25928920
+ * (a == 4).nnz = 30375037
+ * (a == 5).nnz = 20849832
+ * mean (of nonzeros) = 3.6042476
+ * sqrt((diff ddot diff) / diff.nn) = 1.0852 // Train RMSE using mean predictor
+ *
+ * (ta == 1).nnz = 461839
+ * (ta == 2).nnz = 1011882
+ * (ta == 3).nnz = 2882327
+ * (ta == 4).nnz = 3375921
+ * (ta == 5).nnz = 2318400
+ * mean (of nonzeros) = 3.6046705
+ * sqrt((diff ddot diff) / diff.nn) = 1.0851 // Test RMSE using mean predictor
+ *
+ * BTW: (a *@ ta).nnz = 0, which shows that they are completely distinct.
+ */
+
+// Same code as in the ADAGrad-only script.
+setseed(0)
+val dir = "/data/netflix/"
+val a = loadSMat(dir+"newtrain.smat.lz4")
+val ta = loadSMat(dir+"newtest.smat.lz4")
+val d = 256
+val lrates = row(0.01)
+val langs = sqrt(2f * lrates) // NEW! MALA so it's sqrt(2 * lr).
+var bestrmse = 10.0;
+var prettystring = "lrate   lang.   arate   rmse\n"
+
+for (i <- 0 until lrates.length) {
+    for (k <- 0 until 1) {
+        val (nn,opts) = SMF.learner2(a, d)
+
+        // Common parameters with the ADAGrad version.
+        opts.batchSize = 1000
+        opts.npasses = 1
+        opts.nesterov = null
+        opts.langevin = langs(k).v
+        opts.momentum = null
+
+        opts.uiter = 5
+        opts.urate = 0.05f
+        opts.lrate = lrates(i).v
+        val lambda = 4f       
+        opts.lambdau = lambda
+        opts.regumean = lambda
+        opts.lambdam = lambda / 500000 * 20
+        opts.regmmean = opts.lambdam
+        opts.evalStep = 31
+        opts.doUsers = false
+
+        // Now some stuff specific for the MHTest+ADAGrad.
+        opts.smf = true
+        opts.saveAcceptRate = true
+        opts.acceptRateDir = "tmp/"
+        opts.N = a.nnz
+        opts.temp = a.nnz / 1000
+        opts.Nknown = true
+        opts.n2lsigma = 1.0f
+        opts.nn2l = 4000
+        opts.sigmaProposer = 0.01f
+        opts.continueDespiteFull = false
+        opts.verboseMH = true
+        opts.collectData = false
+        opts.collectDataDir = "tmp/"
+        opts.exitTheta = false
+        opts.initThetaHere = true
+        opts.burnIn = -1
+        opts.matrixOfScores = true
+        opts.what
+        nn.train
+
+        val model = nn.model.asInstanceOf[SMF]
+        val xa = (ta != 0)
+        val (mm, mopts) = SMF.predictor1(model, a, xa)
+        mopts.batchSize = 10000
+        mopts.uiter = 5
+        mopts.urate = opts.urate
+        mopts.aopts = null
+        mm.predict
+
+        val pa = SMat(mm.preds(1));
+        println("Note: max(pa)="+maxi(maxi(pa))+" and min(pa)="+mini(mini(pa)))
+        val diff = ta.contents - pa.contents
+        val rmse = sqrt((diff ddot diff) / diff.length)
+        println("\nrmse = %f" format rmse.v)
+        min(pa.contents,5,pa.contents)
+        max(pa.contents,1,pa.contents)
+        val diff2 = ta.contents - pa.contents
+        val rmse2 = sqrt((diff2 ddot diff2) / diff2.length)
+        println("rmse (w/clipping) = %f\n" format rmse2.v)
+
+        val accepts = loadMat(opts.acceptRateDir+"arate_%1.4f_%1.3f.mat.lz4" format (lrates(i).v,langs(k).v))
+        val arate = accepts.nnz / accepts.length.toFloat
+
+        if (rmse2.v < bestrmse) {
+            bestrmse = rmse2.v
+        }   
+        prettystring += "%1.5f  %1.3f  %1.4f  %1.4f\n" format (lrates(i).v,langs(k).v,arate,rmse2.v)
+    }
+}
+
+println("\nBest RMSE: "+bestrmse+ "\n")
+println(prettystring)
+sys.exit
diff --git a/src/main/scala/BIDMach/Learner.scala b/src/main/scala/BIDMach/Learner.scala
@@ -137,7 +137,12 @@ case class Learner(
           if (mixins != null) mixins map (_ compute(mats, here));
           if (updater != null) updater.update(ipass, here, gprogress);
         }
-        val scores = model.evalbatchg(mats, ipass, here);
+
+        // Daniel: I needed to change the following line to the one after it:
+        // val scores = model.evalbatchg(mats, ipass, here);
+        val scores = mean(model.evalbatchg(mats, ipass, here)).v;
+        // in orer for the MH test to work with different-sized minibatches.
+
         if (datasink != null) datasink.put;
         reslist.append(scores.newcopy)
         samplist.append(here)
@@ -879,7 +884,6 @@ object Learner {
 
   def scores2FMat(reslist:ListBuffer[FMat]):FMat = {
     if (reslist.length == 0) return zeros(0, 0)
-
     val out = FMat(reslist(0).nrows, reslist.length)
     var i = 0;
     while (i < reslist.length) {

diff --git a/src/main/scala/BIDMach/models/SFA.scala b/src/main/scala/BIDMach/models/SFA.scala
@@ -216,6 +216,10 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
     Minv <-- inv(50f/nfeats*FMat(mm *^ mm) + opts.lambdau * diagM); 
   }
 
+  /** 
+   *  The evalfun normally called during training. Returns -RMSE on training
+   *  data minibatch (sdata).
+   */
   def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
     val preds = DDS(mm, user, sdata) + (iavg + avg);
     if (ogmats != null) {
@@ -230,6 +234,10 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
   	-sqrt(row(vv/sdata.nnz))
   }
 
+  /** 
+   *  The evalfun normally called during testing and predicting. Returns -RMSE
+   *  on training data minibatch (sdata).
+   */
   override def evalfun(sdata:Mat, user:Mat, preds:Mat, ipass:Int, pos:Long):FMat = {
     val spreds = DDS(mm, user, sdata) + (iavg + avg);
   	val dc = sdata.contents;
@@ -267,25 +275,6 @@ object SFA  {
   }  
   class Options extends Opts {} 
 
-  def learner(mat0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with Grad.Opts
-    val opts = new xopts
-    opts.dim = d
-    opts.putBack = -1
-    opts.npasses = 4
-    opts.lrate = 0.1
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
-  	val nn = new Learner(
-  	    new MatSource(Array(mat0:Mat), opts),
-  	    new SFA(opts), 
-  	    null,
-  	    new Grad(opts), 
-  	    null,
-  	    opts)
-    (nn, opts)
-  }
-
   def learnerX(mat0:Mat, d:Int) = {
     class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with ADAGrad.Opts
     val opts = new xopts
@@ -306,25 +295,6 @@ object SFA  {
     (nn, opts)
   }
 
-  def learner(mat0:Mat, user0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with Grad.Opts
-    val opts = new xopts
-    opts.dim = d
-    opts.putBack = 1
-    opts.npasses = 4
-    opts.lrate = 0.1;
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
-    val nn = new Learner(
-        new MatSource(Array(mat0, user0), opts),
-        new SFA(opts), 
-        null,
-        new Grad(opts), 
-        null,
-        opts)
-    (nn, opts)
-  }
-
   def learnerX(mat0:Mat, user0:Mat, d:Int) = {
     class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with ADAGrad.Opts
     val opts = new xopts
@@ -345,28 +315,9 @@ object SFA  {
     (nn, opts)
   }
 
-   def learnerY(mat0:Mat, user0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with ADAGrad.Opts
-    val opts = new xopts
-    opts.dim = d
-    opts.putBack = 1
-    opts.npasses = 4
-    opts.lrate = 0.1;
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
-    val nn = new Learner(
-        new MatSource(Array(mat0, user0), opts),
-        new SFA(opts), 
-        null,
-        new ADAGrad(opts), 
-        null,
-        opts)
-    (nn, opts)
-  }
-
-   class PredOpts extends Learner.Options with SFA.Opts with MatSource.Opts with MatSink.Opts
+  class PredOpts extends Learner.Options with SFA.Opts with MatSource.Opts with MatSink.Opts
 
-   def predictor(model0:Model, mat1:Mat, preds:Mat) = {
+  def predictor(model0:Model, mat1:Mat, preds:Mat) = {
     val model = model0.asInstanceOf[SFA]
     val nopts = new PredOpts;
     nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)