diff --git a/bidmach b/bidmach
index db39c8be..9bc38a2c 100755
--- a/bidmach
+++ b/bidmach
@@ -2,7 +2,7 @@
 # export JAVA_HOME="" # Set here if not set in environment
 # export CUDA_PATH="" # Set here if not set in environment
 
-MEMSIZE="-Xmx14G"
+MEMSIZE="-Xmx40G"
 export JAVA_OPTS="${MEMSIZE} -Xms128M -Dfile.encoding=UTF-8" # Set as much memory as possible
 BIDMACH_ROOT="${BASH_SOURCE[0]}"
 if [ ! `uname` = "Darwin" ]; then
diff --git a/scripts/daniel_smf_netflix_adagrad.ssc b/scripts/daniel_smf_netflix_adagrad.ssc
new file mode 100755
index 00000000..44a0d391
--- /dev/null
+++ b/scripts/daniel_smf_netflix_adagrad.ssc
@@ -0,0 +1,73 @@
+:silent
+import BIDMach.models.SMF
+
+/**
+ * Test SMF code on netflix data. This will use default ADAGrad. In general,
+ * RMSEs of roughly 0.83 to 0.85 are "good".
+ */
+
+// Same code as in the MHTest+ADAGrad script.
+setseed(0)
+val dir = "/data/netflix/"
+val a = loadSMat(dir+"newtrain.smat.lz4")
+val ta = loadSMat(dir+"newtest.smat.lz4")
+val d = 256
+val lrates = row(0.0001, 0.001, 0.01, 0.1)
+val langs = row(0.0, 0.05, 0.5)
+var bestrmse = 10.0;
+var prettystring = "lrate  lang.  rmse\n"
+
+for (i <- 0 until lrates.length) {
+    for (k <- 0 until langs.length) {
+        val (nn,opts) = SMF.learner1(a, d)
+
+        // Common parameters with the MHTest+ADAGrad version.
+        opts.batchSize = 1000
+        opts.npasses = 2
+        opts.nesterov = null
+        opts.langevin = langs(k).v
+        opts.momentum = 1f
+        
+        opts.uiter = 5
+        opts.urate = 0.05f
+        opts.lrate = lrates(i).v
+        val lambda = 4f
+        opts.lambdau = lambda
+        opts.regumean = lambda
+        opts.lambdam = lambda / 500000 * 20
+        opts.regmmean = opts.lambdam
+        opts.evalStep = 31
+        opts.doUsers = false
+        opts.what
+        nn.train
+        
+        val model = nn.model.asInstanceOf[SMF]
+        val xa = (ta != 0)
+        val (mm, mopts) = SMF.predictor1(model, a, xa)
+        mopts.batchSize = 10000
+        mopts.uiter = 5
+        mopts.urate = opts.urate
+        mopts.aopts = null
+        mm.predict
+        
+        val pa = SMat(mm.preds(1));
+        println("Note: max(pa)="+maxi(maxi(pa))+" and min(pa)="+mini(mini(pa)))
+        val diff = ta.contents - pa.contents
+        val rmse = sqrt((diff ddot diff) / diff.length)
+        println("\nrmse = %f" format rmse.v)
+        min(pa.contents,5,pa.contents)
+        max(pa.contents,1,pa.contents)
+        val diff2 = ta.contents - pa.contents
+        val rmse2 = sqrt((diff2 ddot diff2) / diff2.length)
+        println("rmse (w/clipping) = %f\n" format rmse2.v)
+
+        if (rmse2.v < bestrmse) {
+            bestrmse = rmse2.v
+        }
+        prettystring += "%1.5f  %1.3f  %1.4f\n" format (lrates(i).v,langs(k).v,rmse2.v)
+    }
+}
+
+println("\nBest RMSE: "+bestrmse+ "\n")
+println(prettystring)
+sys.exit
diff --git a/scripts/daniel_smf_netflix_mhtest.ssc b/scripts/daniel_smf_netflix_mhtest.ssc
new file mode 100755
index 00000000..c87d281e
--- /dev/null
+++ b/scripts/daniel_smf_netflix_mhtest.ssc
@@ -0,0 +1,118 @@
+:silent
+import BIDMach.models.SMF
+
+/**
+ * Test SMF code on netflix data. This will use OUR MHTest updater, which I put
+ * in as a new updater (SMF.learner2) to make this script more concise. Some
+ * notes on the netflix dataset:
+ *
+ * size(a) = (17770,480189)
+ * a.nnz = 90430138
+ * min=0, max=5
+ * 
+ * (a == 1).nnz = 4156151
+ * (a == 2).nnz = 9120198
+ * (a == 3).nnz = 25928920
+ * (a == 4).nnz = 30375037
+ * (a == 5).nnz = 20849832
+ * mean (of nonzeros) = 3.6042476
+ * sqrt((diff ddot diff) / diff.nn) = 1.0852 // Train RMSE using mean predictor
+ *
+ * (ta == 1).nnz = 461839
+ * (ta == 2).nnz = 1011882
+ * (ta == 3).nnz = 2882327
+ * (ta == 4).nnz = 3375921
+ * (ta == 5).nnz = 2318400
+ * mean (of nonzeros) = 3.6046705
+ * sqrt((diff ddot diff) / diff.nn) = 1.0851 // Test RMSE using mean predictor
+ *
+ * BTW: (a *@ ta).nnz = 0, which shows that they are completely distinct.
+ */
+
+// Same code as in the ADAGrad-only script.
+setseed(0)
+val dir = "/data/netflix/"
+val a = loadSMat(dir+"newtrain.smat.lz4")
+val ta = loadSMat(dir+"newtest.smat.lz4")
+val d = 256
+val lrates = row(0.01)
+val langs = sqrt(2f * lrates) // NEW! MALA so it's sqrt(2 * lr).
+var bestrmse = 10.0;
+var prettystring = "lrate   lang.   arate   rmse\n"
+
+for (i <- 0 until lrates.length) {
+    for (k <- 0 until 1) {
+        val (nn,opts) = SMF.learner2(a, d)
+
+        // Common parameters with the ADAGrad version.
+        opts.batchSize = 1000
+        opts.npasses = 1
+        opts.nesterov = null
+        opts.langevin = langs(k).v
+        opts.momentum = null
+
+        opts.uiter = 5
+        opts.urate = 0.05f
+        opts.lrate = lrates(i).v
+        val lambda = 4f       
+        opts.lambdau = lambda
+        opts.regumean = lambda
+        opts.lambdam = lambda / 500000 * 20
+        opts.regmmean = opts.lambdam
+        opts.evalStep = 31
+        opts.doUsers = false
+
+        // Now some stuff specific for the MHTest+ADAGrad.
+        opts.smf = true
+        opts.saveAcceptRate = true
+        opts.acceptRateDir = "tmp/"
+        opts.N = a.nnz
+        opts.temp = a.nnz / 1000
+        opts.Nknown = true
+        opts.n2lsigma = 1.0f
+        opts.nn2l = 4000
+        opts.sigmaProposer = 0.01f
+        opts.continueDespiteFull = false
+        opts.verboseMH = true
+        opts.collectData = false
+        opts.collectDataDir = "tmp/"
+        opts.exitTheta = false
+        opts.initThetaHere = true
+        opts.burnIn = -1
+        opts.matrixOfScores = true
+        opts.what
+        nn.train
+        
+        val model = nn.model.asInstanceOf[SMF]
+        val xa = (ta != 0)
+        val (mm, mopts) = SMF.predictor1(model, a, xa)
+        mopts.batchSize = 10000
+        mopts.uiter = 5
+        mopts.urate = opts.urate
+        mopts.aopts = null
+        mm.predict
+        
+        val pa = SMat(mm.preds(1));
+        println("Note: max(pa)="+maxi(maxi(pa))+" and min(pa)="+mini(mini(pa)))
+        val diff = ta.contents - pa.contents
+        val rmse = sqrt((diff ddot diff) / diff.length)
+        println("\nrmse = %f" format rmse.v)
+        min(pa.contents,5,pa.contents)
+        max(pa.contents,1,pa.contents)
+        val diff2 = ta.contents - pa.contents
+        val rmse2 = sqrt((diff2 ddot diff2) / diff2.length)
+        println("rmse (w/clipping) = %f\n" format rmse2.v)
+
+        val accepts = loadMat(opts.acceptRateDir+"arate_%1.4f_%1.3f.mat.lz4" format (lrates(i).v,langs(k).v))
+        val arate = accepts.nnz / accepts.length.toFloat
+
+        if (rmse2.v < bestrmse) {
+            bestrmse = rmse2.v
+        }   
+        prettystring += "%1.5f  %1.3f  %1.4f  %1.4f\n" format (lrates(i).v,langs(k).v,arate,rmse2.v)
+    }
+}
+
+println("\nBest RMSE: "+bestrmse+ "\n")
+println(prettystring)
+sys.exit
diff --git a/src/main/scala/BIDMach/Learner.scala b/src/main/scala/BIDMach/Learner.scala
index 5648164d..e6b07e6a 100755
--- a/src/main/scala/BIDMach/Learner.scala
+++ b/src/main/scala/BIDMach/Learner.scala
@@ -137,7 +137,12 @@ case class Learner(
           if (mixins != null) mixins map (_ compute(mats, here));
           if (updater != null) updater.update(ipass, here, gprogress);
         }
-        val scores = model.evalbatchg(mats, ipass, here);
+        
+        // Daniel: I needed to change the following line to the one after it:
+        // val scores = model.evalbatchg(mats, ipass, here);
+        val scores = mean(model.evalbatchg(mats, ipass, here)).v;
+        // in orer for the MH test to work with different-sized minibatches.
+        
         if (datasink != null) datasink.put;
         reslist.append(scores.newcopy)
         samplist.append(here)
@@ -879,7 +884,6 @@ object Learner {
 
   def scores2FMat(reslist:ListBuffer[FMat]):FMat = {
     if (reslist.length == 0) return zeros(0, 0)
-
     val out = FMat(reslist(0).nrows, reslist.length)
     var i = 0;
     while (i < reslist.length) {
diff --git a/src/main/scala/BIDMach/models/SFA.scala b/src/main/scala/BIDMach/models/SFA.scala
index ff7b65a0..055fbec4 100755
--- a/src/main/scala/BIDMach/models/SFA.scala
+++ b/src/main/scala/BIDMach/models/SFA.scala
@@ -216,6 +216,10 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
     Minv <-- inv(50f/nfeats*FMat(mm *^ mm) + opts.lambdau * diagM); 
   }
    
+  /** 
+   *  The evalfun normally called during training. Returns -RMSE on training
+   *  data minibatch (sdata).
+   */
   def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
     val preds = DDS(mm, user, sdata) + (iavg + avg);
     if (ogmats != null) {
@@ -230,6 +234,10 @@ class SFA(override val opts:SFA.Opts = new SFA.Options) extends FactorModel(opts
   	-sqrt(row(vv/sdata.nnz))
   }
   
+  /** 
+   *  The evalfun normally called during testing and predicting. Returns -RMSE
+   *  on training data minibatch (sdata).
+   */
   override def evalfun(sdata:Mat, user:Mat, preds:Mat, ipass:Int, pos:Long):FMat = {
     val spreds = DDS(mm, user, sdata) + (iavg + avg);
   	val dc = sdata.contents;
@@ -267,25 +275,6 @@ object SFA  {
   }  
   class Options extends Opts {} 
   
-  def learner(mat0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with Grad.Opts
-    val opts = new xopts
-    opts.dim = d
-    opts.putBack = -1
-    opts.npasses = 4
-    opts.lrate = 0.1
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
-  	val nn = new Learner(
-  	    new MatSource(Array(mat0:Mat), opts),
-  	    new SFA(opts), 
-  	    null,
-  	    new Grad(opts), 
-  	    null,
-  	    opts)
-    (nn, opts)
-  }
-  
   def learnerX(mat0:Mat, d:Int) = {
     class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with ADAGrad.Opts
     val opts = new xopts
@@ -306,25 +295,6 @@ object SFA  {
     (nn, opts)
   }
   
-  def learner(mat0:Mat, user0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with Grad.Opts
-    val opts = new xopts
-    opts.dim = d
-    opts.putBack = 1
-    opts.npasses = 4
-    opts.lrate = 0.1;
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
-    val nn = new Learner(
-        new MatSource(Array(mat0, user0), opts),
-        new SFA(opts), 
-        null,
-        new Grad(opts), 
-        null,
-        opts)
-    (nn, opts)
-  }
-  
   def learnerX(mat0:Mat, user0:Mat, d:Int) = {
     class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with ADAGrad.Opts
     val opts = new xopts
@@ -345,28 +315,9 @@ object SFA  {
     (nn, opts)
   }
   
-   def learnerY(mat0:Mat, user0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SFA.Opts with MatSource.Opts with ADAGrad.Opts
-    val opts = new xopts
-    opts.dim = d
-    opts.putBack = 1
-    opts.npasses = 4
-    opts.lrate = 0.1;
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
-    val nn = new Learner(
-        new MatSource(Array(mat0, user0), opts),
-        new SFA(opts), 
-        null,
-        new ADAGrad(opts), 
-        null,
-        opts)
-    (nn, opts)
-  }
-  
-   class PredOpts extends Learner.Options with SFA.Opts with MatSource.Opts with MatSink.Opts
+  class PredOpts extends Learner.Options with SFA.Opts with MatSource.Opts with MatSink.Opts
      	
-   def predictor(model0:Model, mat1:Mat, preds:Mat) = {
+  def predictor(model0:Model, mat1:Mat, preds:Mat) = {
     val model = model0.asInstanceOf[SFA]
     val nopts = new PredOpts;
     nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
diff --git a/src/main/scala/BIDMach/models/SMF.scala b/src/main/scala/BIDMach/models/SMF.scala
index 4d8657e5..7d9a38d6 100755
--- a/src/main/scala/BIDMach/models/SMF.scala
+++ b/src/main/scala/BIDMach/models/SMF.scala
@@ -5,6 +5,7 @@ import BIDMat.MatFunctions._
 import BIDMat.SciFunctions._
 import BIDMat.Solvers._
 import BIDMach.datasources._
+import BIDMach.datasinks._
 import BIDMach.updaters._
 import BIDMach.Learner
 
@@ -71,35 +72,48 @@ class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts
   var epsilon = 0f;
   var aopts:ADAGrad.Opts = null;
 
+  // Daniel: doing this to set MB sizes in MHTest code.
+  var numNonzerosMB:Int = -1
   
   override def init() = {
+    // Get dimensions; for Netflix, size(mats(0)) = (17770,batchSize).
     mats = datasource.next;
-  	datasource.reset;
-	  nfeats = mats(0).nrows;
-	  val batchSize = mats(0).ncols;
+    datasource.reset;
+    nfeats = mats(0).nrows;
+    val batchSize = mats(0).ncols;
+    numNonzerosMB = mats(0).nnz
     val d = opts.dim;
+
     if (refresh) {
-    	mm = normrnd(0,0.01f,d,nfeats);
-    	mm = convertMat(mm);
-    	avg = mm.zeros(1,1)
-    	iavg = mm.zeros(nfeats,1);
-    	itemsum = mm.zeros(nfeats, 1);
-    	itemcount = mm.zeros(nfeats, 1);
-    	setmodelmats(Array(mm, iavg, avg));
+      // Randomly drawing mm, iavg, and avg (the three respective model
+      // matrices). Note that nfeats is the number of items (e.g. movies).
+      println("Inside refresh")
+      mm = normrnd(0,0.01f,d,nfeats);
+      mm = convertMat(mm);
+      avg = mm.zeros(1,1)
+      iavg = mm.zeros(nfeats,1);
+      itemsum = mm.zeros(nfeats, 1);
+      itemcount = mm.zeros(nfeats, 1);
+      setmodelmats(Array(mm, iavg, avg));
     } 
+
+    // Handle brief logic with GPUs. Careful with aliasing as well!!
     useGPU = opts.useGPU && Mat.hasCUDA > 0;    
-	  if (useGPU || useDouble) {
-	    gmats = new Array[Mat](mats.length);
-	  } else {
-	    gmats = mats;
-	  }  
-	  
-	  modelmats(0) = convertMat(modelmats(0));
-	  modelmats(1) = convertMat(modelmats(1));
-	  modelmats(2) = convertMat(modelmats(2));
-	  mm = modelmats(0);
+    if (useGPU || useDouble) {
+      gmats = new Array[Mat](mats.length);
+    } else {
+      gmats = mats;
+    }  
+    modelmats(0) = convertMat(modelmats(0));
+    modelmats(1) = convertMat(modelmats(1));
+    modelmats(2) = convertMat(modelmats(2));
+    mm = modelmats(0);
     iavg = modelmats(1);
     avg = modelmats(2);
+
+    // Here's some confusing stuff. Seems to be "small" stuff about constants.
+    // uscale, an internal ADAGrad parameter but we use it (!!!).
+    // cscale, an internal ADAGrad parameter but we ignore it.
     lamu = mm.ones(d, 1) ∘ opts.lambdau 
     if (opts.doUsers) lamu(0) = opts.regumean;
     slm = mm.ones(1,1) ∘ (opts.lambdam * batchSize);
@@ -109,12 +123,18 @@ class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts
     cscale = mm.ones(d, 1);
     cscale(0,0) = 0.0001f;
     if (opts.doUsers) mm(0,?) = 1f
+
+    // The updatemats is the same length as the model matrices.
     updatemats = new Array[Mat](3);
     updatemats(2) = mm.zeros(1,1);
+
+    // Set this to null to avoid the internal ADAGrad updater making updates.
     if (opts.aopts != null) initADAGrad(d, nfeats);
-    vexp = convertMat(row(0.5f));
+    vexp = convertMat(row(0.5f)); // External ADAGrad parameter, OK here.
   }
-  
+
+ 
+  /** An internal ADAGrad updater. Ignore this for our current experiments. */
   def initADAGrad(d:Int, m:Int) = {
   	aopts = opts.asInstanceOf[ADAGrad.Opts]
     firststep = -1f;
@@ -127,141 +147,241 @@ class SMF(override val opts:SMF.Opts = new SMF.Options) extends FactorModel(opts
     waitsteps = aopts.waitsteps;
     epsilon = aopts.epsilon;
   }
- 
+
+
+  /**
+   * Performs some number of passes over the minibatch to update the user
+   * matrix. Try to understand how the user matrix gets updated ... note that
+   * putBack = -1 by default. I think this is the user matrix update, so we're
+   * holding the item matrix fixed (it's actually the model matrix, but the same
+   * point holds) while updating the user by stochastic gradient descent.
+   * 
+   * We subtract biases, so predictions can be done with DDS(mm,user,sdata)
+   * *without* re-adding biases. Also, we *do* clear out user here. is this
+   * because John said we can't really save the entire *full* user matrix (the
+   * one with size (dim,480189))?  ucounts sums up the number of nonzeros in
+   * each columns of sdata0, then uci is something else on it. b might make
+   * sense in some way, because the derivative term later is mm*(sdata-preds)
+   * and the sdata-preds is supposed to be close to each other.
+   *
+   * We then update the user matrix several times based on current predictions.
+   * Actually, this update makes sense because the normal SGD update for x_u
+   * (user vectors) is x_u minus the following term:
+   *
+   *     alpha*(data-prediction)*item_vector + lambda*user_vector
+   *
+   * and that's what we have here! QUESTION, though, uscale is an integrated
+   * ADAGrad value. Do we want it here? I'm also not sure why we need du to
+   * have uscale and uci there ...
+   * 
+   * NOTE: Upon further inspection, it seems that `user` starts out as a matrix
+   * of all zeros. So the user.clear with putBack<0 is un-necessary as it is
+   * already cleared. I suppose in theory we should have some putBack mechanism
+   * (that way, the user matrix value is stored from prior iterations) but John
+   * said there's little reason to do that. Also, even with putBack=1, I can't
+   * get the user matrix's values carried over. Hmmm ...
+   *
+   * @param sdata0 Training data minibatch of size (nitems, batchSize).
+   * @param user Second matrix for computing predictions, of size (dim, batchSize).
+   */ 
   def uupdate(sdata0:Mat, user:Mat, ipass:Int, pos:Long):Unit =  { 
-  	if (firststep <= 0) firststep = pos.toFloat;
-  	val step = (pos + firststep)/firststep;
-  	val texp = if (opts.asInstanceOf[Grad.Opts].texp.asInstanceOf[AnyRef] != null) {
-  	  opts.asInstanceOf[Grad.Opts].texp.dv
-  	} else {
-  	  opts.asInstanceOf[Grad.Opts].pexp.dv
-  	}
-  	uscale.set(opts.urate * math.pow(ipass+1, - texp).toFloat) 
+    if (firststep <= 0) firststep = pos.toFloat;
+    val step = (pos + firststep)/firststep;
+    val texp = if (opts.asInstanceOf[Grad.Opts].texp.asInstanceOf[AnyRef] != null) {
+      opts.asInstanceOf[Grad.Opts].texp.dv
+    } else {
+      opts.asInstanceOf[Grad.Opts].pexp.dv
+    }
+
+    uscale.set(opts.urate * math.pow(ipass+1, - texp).toFloat) 
     val sdata = sdata0 - (iavg + avg);
-	  if (putBack < 0) {
-	  	user.clear
-	  }
-	  val b = mm * sdata;
-	  val ucounts = sum(sdata0 != 0f);
-	  val uci = (ucounts + 1f) ^ (- vexp);
-	  for (i <- 0 until opts.uiter) {
-	  	val preds = DDS(mm, user, sdata);
-	  	val deriv = b - mm * preds - (user ∘ lamu);
-	  	val du = (deriv ∘ uscale ∘ uci);
-	  	if (opts.lsgd >= 0) {
-	  		val dpreds = DDS(mm, du, sdata);
-	  	  accept(sdata, user, du, preds, dpreds, uscale, lamu, false);
-	  	} else {
-	  		user ~ user + du;
-	  	}
-
-	  	if (opts.traceConverge) {
-	  		println("step %d, loss %f" format (i, ((norm(sdata.contents - preds.contents) ^ 2f) + (sum(user dot (user ∘ lamu)))).dv/sdata.nnz));
-	  	}
-	  }
+    if (putBack < 0) {
+      user.clear
+    }
+    val b = mm * sdata;
+    val ucounts = sum(sdata0 != 0f);
+    val uci = (ucounts + 1f) ^ (- vexp);
+
+    for (i <- 0 until opts.uiter) {
+      val preds = DDS(mm, user, sdata);
+      val deriv = b - mm * preds - (user ∘ lamu);
+      val du = (deriv ∘ uscale ∘ uci);
+      user ~ user + du;
+      if (opts.traceConverge) {
+        println("step %d, loss %f" format (i, ((norm(sdata.contents - preds.contents) ^ 2f) + (sum(user dot (user ∘ lamu)))).dv/sdata.nnz));
+      }
+    }
   }
-  
+
+
+  /**
+   * Computes updates to the updatemats. Note again that we subtract (iavg+avg)
+   * from the sdata, so that predictions are done with DDS(mm,user,sdata), and
+   * the differences (for gradient update later) are stored. This is for
+   * updating the item matrix, so we hold the user matrix fixed (it's updated in
+   * uupdate) and compute updates for the item matrix (and the bias terms,
+   * actually). Also, this might be why we don't use a user bias term. Note that
+   * we call predictions once here, since mm and user are fixed for this method;
+   * ideally, some other updater will use the updatemats we compute (i.e. the
+   * gradients) to update the item matrix. The item matrix, if it wasn't clear,
+   * is modelmats(0).
+   * 
+   * Note that there's some extra work with ipass < 1, I think to get reasonable
+   * initialization values for our bias terms. Here, avg is the average rating
+   * across the entire nonzeros of sdata0 (hence, our global bias), and iavg is
+   * some (scaled) estimate of how much we should boost each individal item. The
+   * iavg should be smaller since the avg already scales stuff to roughly 3.6.
+   * 
+   * During predictions, this method is NOT called, hence the biases aren't
+   * updated.
+   *
+   * @param sdata0 Training data minibatch of size (nitems, batchSize).
+   * @param user Second matrix for computing predictions, of size (dim,
+   * 		batchSize). The matrix has the same values as the user matrix updated
+   * 		from the most recent uupdate method call.
+   */ 
   def mupdate(sdata0:Mat, user:Mat, ipass:Int, pos:Long):Unit = {
     val sdata = sdata0 - (iavg + avg);
     // values to be accumulated
     val preds = DDS(mm, user, sdata);
-    val diffs = sdata + 2f;
+    val diffs = sdata + 2f; // I THINK 2f is only for avoiding aliasing, but why not 0f?
     diffs.contents ~ sdata.contents - preds.contents;
+
     if (ipass < 1) {
-    	itemsum ~ itemsum + sum(sdata0, 2);
-    	itemcount ~ itemcount + sum(sdata0 != 0f, 2);
-    	avg ~ sum(itemsum) / sum(itemcount);
-    	iavg ~ ((itemsum + avg) / (itemcount + 1)) - avg;
+      itemsum ~ itemsum + sum(sdata0, 2); // sum horizontally
+      itemcount ~ itemcount + sum(sdata0 != 0f, 2); // count #nonzeros horizontally 
+      avg ~ sum(itemsum) / sum(itemcount);
+      iavg ~ ((itemsum + avg) / (itemcount + 1)) - avg;
     } 
+    
+    // Compute gradient updates for the biases, and set wuser=user unless we're weighing.
     val icomp = sdata0 != 0f
     val icount = sum(sdata0 != 0f, 2);
     updatemats(1) = (sum(diffs,2) - iavg*mlm) / (icount + 1f);   // per-item term estimator
     updatemats(2) ~ sum(diffs.contents) / (diffs.contents.length + 1f);
     val wuser = if (opts.weightByUser) {
-    	val iwt = 100f / max(sum(sdata != 0f), 100f); 
+      val iwt = 100f / max(sum(sdata != 0f), 100f); 
       user ∘ iwt;
     } else {
       user;
     }
     if (firststep <= 0) firststep = pos.toFloat;
+
+    // I get it! This derivative is virtually the same as what we had with the
+    // user update, except user and mm swap locations, which is expected.
     if (opts.lsgd >= 0 || opts.aopts == null) {
-    	updatemats(0) = (wuser *^ diffs - (mm ∘ slm)) / ((icount + 1).t ^ vexp);   // simple derivative
-    	if (opts.lsgd >= 0) {
-    		val step = (pos + firststep)/firststep;
-    		uscale.set((lrate.dv * math.pow(step, - texp.dv)).toFloat);
-    		val dm = updatemats(0) ∘ uscale ∘ cscale;
-    	  val dpreds = DDS(dm, user, sdata);
-    	  accept(sdata, mm, dm, preds, dpreds, uscale, slm, true);
-    	}
+      updatemats(0) = (wuser *^ diffs - (mm ∘ slm)) / ((icount + 1).t ^ vexp);   // simple derivative
     } else {
-    	if (texp.asInstanceOf[AnyRef] != null) {
-    		val step = (pos + firststep)/firststep;
-    		ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps);
-    	} else {
-    		ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, pexp, vexp, epsilon, ipass + 1, waitsteps);
-    	}
+      if (texp.asInstanceOf[AnyRef] != null) {
+        val step = (pos + firststep)/firststep;
+        ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, texp, vexp, epsilon, step, waitsteps);
+      } else {
+        ADAGrad.multUpdate(wuser, diffs, modelmats(0), sumsq, null, lrate, pexp, vexp, epsilon, ipass + 1, waitsteps);
+      }
     }
     if (opts.doUsers) mm(0,?) = 1f;
   }
   
-   def accept(sdata:Mat, mmod:Mat, du:Mat, preds:Mat, dpreds:Mat, scale:Mat, lambda:Mat, flip:Boolean) = {
- // 	println("sdata " + FMat(sdata.contents)(0->5,0).t)
-    	val diff1 = preds + 0f;
-	  	diff1.contents ~ sdata.contents - preds.contents;
-//	  	println("sdata %d %s" format (if (flip) 1 else 0, FMat(sdata.contents)(0->5,0).t.toString));
-//	  	println("preds %d %s" format (if (flip) 1 else 0, FMat(preds.contents)(0->5,0).t.toString));
-//	  	println("diff %d %s" format (if (flip) 1 else 0, FMat(diff1.contents)(0->5,0).t.toString));
-//	  	println("sdata "+FMat(sdata.contents)(0->5,0).t.toString);
-	  	val diff2 = diff1 + 0f;
-	  	diff2.contents ~ diff1.contents - dpreds.contents;
-	  	diff1.contents ~ diff1.contents ∘ diff1.contents;
-	  	diff2.contents ~ diff2.contents ∘ diff2.contents;
-	  	val rmmod = mmod + 1f;
-	  	normrnd(0, opts.lsgd, rmmod);
-	  	val mmod2 = mmod + du + rmmod ∘ scale;
-	  	val loss1 = (if (flip) sum(diff1,2).t else sum(diff1)) + (mmod dot (mmod ∘ lambda));
-	  	val loss2 = (if (flip) sum(diff2,2).t else sum(diff2)) + (mmod2 dot (mmod2 ∘ lambda));
-	  	
-	  	val accprob = erfc((loss2 - loss1) /scale);	  	
-	  	val rsel = accprob + 0f;
-	  	rand(rsel);
-	  	val selector = rsel < accprob;
-	  	mmod ~ (mmod2 ∘ selector) + (mmod ∘ (1f - selector));
-	  	if (opts.traceConverge) {
-	  	  println("accepted %d %f %f %f" format (if (flip) 1 else 0, mean(selector).dv, mean(loss1).dv, mean(loss2).dv));
-	  	}
+
+  /** 
+   * The evalfun normally called during training. Returns -RMSE on training data
+   * minibatch (sdata0). It has an extra option to return a matrix of scores,
+   * which will be useful for minibatch MH test updaters. We need a 1/(2*sigma^2) 
+   * if we're assuming a Gaussian error distribution. 
+   *
+   * @param sdata Training data minibatch of size (nitems, batchSize).
+   * @param user Second matrix for computing predictions, of size (dim,
+   * 		batchSize). The values here are based on the values computed in the most
+   * 		recent uupdate call.
+   */
+  def evalfun(sdata:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
+    val preds = DDS(mm, user, sdata) + (iavg + avg);
+    if (ogmats != null) {
+      ogmats(0) = user;
+      if (ogmats.length > 1) {
+        ogmats(1) = preds;
+      }
+    }
+    val dc = sdata.contents
+    val pc = preds.contents
+    val diff = DMat(dc - pc);
+    if (opts.matrixOfScores) {
+      // TODO Temporary but should be OK for now (b/c we almost never increment MB).
+      // The FMat(diff *@ diff) will make a vector, hence the broadcasting.
+      // Also, I was getting a handful of *really* negative scores where log
+      // p(.) went to -10000 or so. To prevent that, set a threshold at -15.
+      //println(sqrt((diff ddot diff)/diff.length)) // Use for debugging and sanity checks.
+      val sigma_sq = variance(diff).dv
+      val scores = -ln(sqrt(2*math.Pi*sigma_sq)).v - (1.0f/(2*sigma_sq)).v * FMat(diff *@ diff)
+      max(scores, -15f, scores)
+      scores
+    } else {
+      val vv = diff ddot diff;
+      -sqrt(row(vv/sdata.nnz))
+    }
+  }
+
+
+  /** 
+   * The evalfun normally called during TESTING (i.e. PREDICTION). Returns -RMSE
+   * on the TRAINING minibatch in `sdata`. We should also store predictions in
+   * `ogmats(1)`, which is what we can access externally via `preds(1)`. Thus,
+   * it's predicting based on both training and testing.
+   * 
+   * @param sdata Training data minibatch of size (nitems, batchSize).
+   * @param user Second matrix for computing predictions, size (dim, batchSize).
+   * @param preds Matrix indicating the non-zero TESTING data points.
+   */
+  override def evalfun(sdata:Mat, user:Mat, preds:Mat, ipass:Int, pos:Long):FMat = {
+    val spreds = DDS(mm, user, sdata) + (iavg + avg);
+    val xpreds = DDS(mm, user, preds) + (iavg + avg);
+    val dc = sdata.contents;
+    val pc = spreds.contents;
+    val vv = (dc - pc) ddot (dc - pc);
+
+    println("mean values (train/t.pred): \t%1.4f\t%1.4f" format (FMat(mean(dc)).v,FMat(mean(pc)).v))
+    println("std. values (train/t.pred): \t%1.4f\t%1.4f" format (FMat(sqrt(variance(dc))).v,FMat(sqrt(variance(pc))).v))
+    println("max. values (train/t.pred): \t%1.4f\t%1.4f" format (FMat(maxi(dc)).v,FMat(maxi(pc)).v))
+    println("min. values (train/t.pred): \t%1.4f\t%1.4f" format (FMat(mini(dc)).v,FMat(mini(pc)).v))
+
+    if (ogmats != null) {
+      ogmats(0) = user;
+      if (ogmats.length > 1) {
+        ogmats(1) = xpreds;
+      }
+    }
+    -sqrt(row(vv/sdata.nnz))
   }
   
-  def evalfun(sdata0:Mat, user:Mat, ipass:Int, pos:Long):FMat = {
-  	val sdata = sdata0 - (iavg + avg);
-    val preds = DDS(mm, user, sdata);
-  	val dc = sdata.contents
-  	val pc = preds.contents
-  	val diff = dc - pc;
-  	val vv = diff ddot diff;
-  	-sqrt(row(vv/sdata.nnz))
+  
+  /** So I can set the MHTest container size appropriately. */
+  def getNonzeros():Int = {
+    return numNonzerosMB
   }
+
+
 }
 
+
 object SMF {
   trait Opts extends FactorModel.Opts {
   	var ueps = 1e-10f
   	var uconvg = 1e-3f
-  	var miter = 5
   	var lambdau = 5f
   	var lambdam = 5f
   	var regumean = 0f
   	var regmmean = 0f
   	var urate = 0.1f
-  	var lsgd = 0.1f
   	var traceConverge = false
   	var doUsers = true
   	var weightByUser = false
   	var aopts:ADAGrad.Opts = null;
   	var minv = 1f;
   	var maxv = 5f;
-  	
+  	var matrixOfScores = false;
+    var lsgd = 0f;
   }  
+
   class Options extends Opts {} 
   
   def learner(mat0:Mat, d:Int) = {
@@ -282,67 +402,78 @@ object SMF {
   	    opts)
     (nn, opts)
   }
-  
-  def learnerX(mat0:Mat, d:Int) = {
+
+  /** 
+   * Learner with single (training data) matrix as datasource, and an
+   * **EXTERNAL** ADAGrad Opts. We will benchmark this with the learner using
+   * an external MHTest. No internal ADAGrad updater.
+   */
+  def learner1(mat0:Mat, d:Int) = { 
     class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with ADAGrad.Opts
     val opts = new xopts
-    opts.dim = d
+    opts.dim = d 
     opts.putBack = -1
-    opts.npasses = 4
-    opts.lrate = 0.1;
-    opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
-    opts.aopts = opts;
-  	val nn = new Learner(
-  	    new MatSource(Array(mat0:Mat), opts),
-  	    new SMF(opts), 
-  	    null,
-  	    null, 
-  	    null,
-  	    opts);
+    opts.npasses = 4 
+    opts.lrate = 0.1 
+    opts.initUval = 0f; 
+    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+    opts.aopts = null
+    val nn = new Learner(
+      new MatSource(Array(mat0:Mat), opts),
+      new SMF(opts), 
+      null,
+      new ADAGrad(opts), 
+      null,
+      opts)
     (nn, opts)
   }
-  
-  def learner(mat0:Mat, user0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with Grad.Opts
+
+  /**
+   * Learner with single (training data) matrix as datasource, and using our
+   * MHTest updater. Use this for running experiments to benchmark with default
+   * ADAGrad. For our experiments, we should NOT be using opts.aopts, which is
+   * the internal ADAGrad updater. So that should be null ...
+   */
+  def learner2(mat0:Mat, d:Int) = { 
+    class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with ADAGrad.Opts with MHTest.Opts
     val opts = new xopts
-    opts.dim = d
-    opts.putBack = 1
-    opts.npasses = 4
-    opts.lrate = 0.1;
-    opts.initUval = 0f;
+    opts.dim = d 
+    opts.putBack = -1
+    opts.npasses = 4 
+    opts.lrate = 0.1 
+    opts.initUval = 0f; 
     opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
+    opts.aopts = null
     val nn = new Learner(
-        new MatSource(Array(mat0, user0), opts),
-        new SMF(opts), 
-        null,
-        new Grad(opts), 
-        null,
-        opts)
+      new MatSource(Array(mat0:Mat), opts),
+      new SMF(opts), 
+      null,
+      new MHTest(opts), 
+      null,
+      opts)
     (nn, opts)
-  }
-  
-  def learnerX(mat0:Mat, user0:Mat, d:Int) = {
-    class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with ADAGrad.Opts
+  } 
+
+  def learner(mat0:Mat, user0:Mat, d:Int) = {
+    class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with Grad.Opts
     val opts = new xopts
     opts.dim = d
     opts.putBack = 1
     opts.npasses = 4
     opts.lrate = 0.1;
     opts.initUval = 0f;
-    opts.batchSize = math.min(100000, mat0.ncols/30 + 1);
-    opts.aopts = opts;
+    opts.batchSize = math.min(100000, mat0.ncols/30 + 1)
     val nn = new Learner(
         new MatSource(Array(mat0, user0), opts),
         new SMF(opts), 
         null,
-        null, 
+        new Grad(opts), 
         null,
         opts)
     (nn, opts)
   }
   
-   def predictor(model0:Model, mat1:Mat, preds:Mat) = {
+  def predictor(model0:Model, mat1:Mat, preds:Mat) = {
   	class xopts extends Learner.Options with SMF.Opts with MatSource.Opts with Grad.Opts
     val model = model0.asInstanceOf[SMF]
     val nopts = new xopts;
@@ -354,7 +485,6 @@ object SMF {
     val mopts = model.opts.asInstanceOf[SMF.Opts];
     nopts.dim = mopts.dim;
     nopts.uconvg = mopts.uconvg;
-    nopts.miter = mopts.miter;
     nopts.lambdau = mopts.lambdau;
     nopts.lambdam = mopts.lambdam;
     nopts.regumean = mopts.regumean;
@@ -369,6 +499,46 @@ object SMF {
         nopts)
     (nn, nopts)
    }
-}
-
+  
+  /** A class for one of the SMF predictors. */
+  class PredOpts extends Learner.Options with SMF.Opts with MatSource.Opts with MatSink.Opts with Grad.Opts with ADAGrad.Opts
 
+  /** 
+   * A predictor which will store the predictions in the predictor model
+   * matrices. It forms an empty matrix to be populated by the `user` matrices,
+   * which turns into the second factor matrix. It mirrors an SFA predictor code
+   * which also forms this empty matrix into the matrix datasource, with the
+   * only difference being the lack of an Minv option for `newmod`.
+   * 
+   * @param mat1 The TRAINING DATA matrix. NOT THE TESTING DATA!!! NOT THE
+   * 		TESTING DATA!!!
+   * @param preds The non-zeros of the TESTING data (not training).
+   */
+  def predictor1(model0:Model, mat1:Mat, preds:Mat) = { 
+    val model = model0.asInstanceOf[SMF]
+    val nopts = new PredOpts;
+    nopts.batchSize = math.min(10000, mat1.ncols/30 + 1)
+    nopts.putBack = -1
+    nopts.initUval = 0f // Daniel: for consistency with training update.
+    val newmod = new SMF(nopts);
+    newmod.refresh = false
+    newmod.copyFrom(model);
+    val mopts = model.opts.asInstanceOf[SMF.Opts];
+    nopts.dim = mopts.dim;
+    nopts.uconvg = mopts.uconvg;
+    nopts.lambdau = mopts.lambdau;
+    nopts.lambdam = mopts.lambdam;
+    nopts.regumean = mopts.regumean;
+    nopts.doUsers = mopts.doUsers;
+    nopts.weightByUser = mopts.weightByUser;
+    nopts.nmats = 2;
+    val nn = new Learner(
+        new MatSource(Array(mat1, zeros(mopts.dim, mat1.ncols), preds), nopts), 
+        newmod, 
+        null,
+        null,
+        new MatSink(nopts),
+        nopts)
+    (nn, nopts)
+  } 
+}
diff --git a/src/main/scala/BIDMach/updaters/ADAGrad.scala b/src/main/scala/BIDMach/updaters/ADAGrad.scala
index bd27ea93..8658b1ee 100755
--- a/src/main/scala/BIDMach/updaters/ADAGrad.scala
+++ b/src/main/scala/BIDMach/updaters/ADAGrad.scala
@@ -12,6 +12,9 @@ import scala.concurrent.ExecutionContext.Implicits.global
 
 class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Updater {
   
+  // Three other sets of matrices (sumSq, momentum, and randmat) all have (if
+  // initialized) same length and sizes as the modelmats and updatemats. Is the
+  // `mu` here a momentum term as well?
   var firstStep = 0f
   var modelmats:Array[Mat] = null
   var updatemats:Array[Mat] = null  
@@ -19,15 +22,32 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
   var stepn:Mat = null
   var mask:Mat = null
   var momentum:Array[Mat] = null;
-  var ve:Mat = null
-  var pe:Mat = null
-  var te:Mat = null
+  var ve:Mat = null // opts.vexp, an ADAGrad parameter (see BIDMach wiki)
+  var pe:Mat = null // Similar to opts.texp? Some exponent like the others?
+  var te:Mat = null // opts.texp, an ADAGrad parameter (see BIDMach wiki)
   var lrate:Mat = null
+  var tscale:Mat = null // Daniel: I had to add this to make it accessible to MHTest.
   var mu:Mat = null
   var one:Mat = null
   var randmat:Array[Mat] = null
 
+
+  /**
+   * Initialize ADAGrad model. Note the conditions required to create momentum
+   * and randmats. We have opts.momentum but also momentum mats, similar thing
+   * with nesterov. I would *guess* that opts.momentum is that hyper-parameter,
+   * while the momemtum mats gives the update for each parameter (i.e. component
+   * wise). But then what's the hyperparameter for Nesterov? There isn't any?
+   * BTW for both opts.momentum and opts.nesterov, we can set them to be one
+   * value or a vector with one per modelmat. If just one value, then it's
+   * repeated across all modelmats. ALSO, what value to set for opts.langevin?
+   * 
+   * EDIT: figured it out, opts.nesterov is the same as opts.momentum, assuming
+   * that we choose to run one of momentum and nesterov's (and not both, which
+   * wouldn't make sense).
+   */
   override def init(model0:Model) = {
+    tscale = 1 // Daniel: and had to do this here to make it non-null to start.
     model = model0
     modelmats = model.modelmats;
     updatemats = model.updatemats;
@@ -57,7 +77,13 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
     ve <-- opts.vexp;
     te <-- opts.texp;
   } 
+
   
+  /** 
+   * Daniel: I don't know what this is supposed to do. However, there is *no*
+   * mention of any `update2` in the entire BIDMach repository --- I did a
+   * search. Hence, I think tihs method can be safely ignored.
+   */
   def update2(ipass:Int, step:Long):Unit = {
   	modelmats = model.modelmats;
   	updatemats = model.updatemats;
@@ -99,7 +125,26 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
       um.clear;
   	}
   }
+
 	
+  /**
+   * Whew, the major heavy-hitting for ADAGrad. Let's try to digest this slowly ...
+   * 
+   * First, it looks like John is defining nsteps and tscale. Not sure what
+   * these mean...
+   * 
+   * Second is the heavy-duty part, looping over each model matrix and the other
+   * matrices involved. What is opts.policies? I'm confused. Anyway, look at the
+   * CPU version of the case methods. The other version is the GPU, which I'll
+   * ignore now. 
+   * 
+   * The CPU version is more readable. First, it deals with some logic regarding
+   * the sum of squares (?). Second, it applies the Langevin dynamics. Third,
+   * and the crucial part, it applies either momentum updates or nesterov 
+   * updates. I don't *think* we should be using both momentum or nesterov, at
+   * least based on this code logic. OH, I see the learning rate applied, then
+   * momentum (or nesterov) and indeed it matches the algorithm formulation.
+   */
   override def update(ipass:Int, step:Long, gprogress:Float):Unit = { 
     val start = toc;
     modelmats = model.modelmats
@@ -112,7 +157,8 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
       	step / firstStep;
       }
     }
-    val tscale = if (opts.texp.asInstanceOf[AnyRef] != 0) {
+    // Daniel: had to change this to make it accessible
+    var tscale = if (opts.texp.asInstanceOf[AnyRef] != 0) {
       stepn.set(1/(nsteps+1));
       stepn ^ te;
     } else {
@@ -121,7 +167,9 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
     }
     val nw = stepn;
     val nmats = math.min(modelmats.length, updatemats.length);
-//    println("u sumsq %g" format mini(sumSq(0)).dv)
+    
+    // This is applied to each model matrix separately.
+    // NOTE!! I changed momentum to be momentum(i); see my GitHub issue.
     for (i <- 0 until nmats) {
     	if (opts.policies.asInstanceOf[AnyRef] != null) {
     		if (opts.policies.length > 1) {
@@ -142,10 +190,10 @@ class ADAGrad(override val opts:ADAGrad.Opts = new ADAGrad.Options) extends Upda
     	  case (gmm:GMat, gum:GMat, gss:GMat, gve:GMat, gts:GMat, glrate:GMat) => {
           if (opts.momentum.asInstanceOf[AnyRef] != null) {
             val mu = if (opts.momentum.length > 1) opts.momentum(i) else opts.momentum(0);
-            ADAGrad.ADAGradm(gmm, gum, gss, momentum.asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
+            ADAGrad.ADAGradm(gmm, gum, gss, momentum(i).asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
           } else if (opts.nesterov.asInstanceOf[AnyRef] != null) {
             val mu = if (opts.nesterov.length > 1) opts.nesterov(i) else opts.nesterov(0);
-            ADAGrad.ADAGradn(gmm, gum, gss, momentum.asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
+            ADAGrad.ADAGradn(gmm, gum, gss, momentum(i).asInstanceOf[GMat], mu, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
           } else {
         	  ADAGrad.ADAGradx(gmm, gum, gss, mask.asInstanceOf[GMat], nw.dv.toFloat, gve, gts, glrate, opts.langevin, opts.epsilon, (opts.waitsteps < nsteps));
           }
diff --git a/src/main/scala/BIDMach/updaters/MHTest.scala b/src/main/scala/BIDMach/updaters/MHTest.scala
index 8bfc10d9..cefeda8a 100755
--- a/src/main/scala/BIDMach/updaters/MHTest.scala
+++ b/src/main/scala/BIDMach/updaters/MHTest.scala
@@ -4,7 +4,9 @@ import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat,TM
 import BIDMat.MatFunctions._
 import BIDMat.SciFunctions._
 import BIDMach.models._
-import edu.berkeley.bid.CUMACH
+import BIDMach.models.Model._
+import edu.berkeley.bid.CUMACH
+import BIDMach.Learner
 
 /**
  * Our fast MH test. See:
@@ -61,13 +63,33 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
   var b:Long = 0                       // Current minibatch size (also `b` in the paper).
   var N:Long = 0                       // Maximum minibatch size (i.e. all training data).
   var n:Float = 0f                     // The *number* of minibatches we are using.
-  var logu:Float = 0f                  // log u, since we assume a symmetric proposer.
+  var psi:Float = 0f                   // \psi = log (1 * prop_ratio * prior_ratio)
   var T:Int = 1                        // The temperature of the distribution.
-  var t:Int = 0                        // Current number of samples of theta.
+  var _t:Int = 0                       // Current number of samples of theta.
   var sumOfValues:Float = 0f           // \sum_{i=1}^b (N/T)*log(p(x_i|theta')/p(x_i|theta)).
   var sumOfSquares:Float = 0f          // \sum_{i=1}^b ((N/T)*log(p(x_i|theta')/p(x_i|theta)))^2.
   var targetVariance:Float = 0f        // The target variance (so we only need one X_corr).
 
+  // Daniel: experimental, for the SMF.
+  var currentSizeSMF:Int = -1;
+  var adagrad:ADAGrad = null;
+  var tmpMomentum:Array[Mat] = null
+  var acceptanceRate:Mat = null
+  var currentUpdatemats:Array[Mat] = null
+  var proposedUpdatemats:Array[Mat] = null
+
+  var sdata0:GSMat = null
+  var user:Mat = null
+  var mm:Mat = null
+  var iavg:Mat = null
+  var avg:Mat = null
+  var uscale:Mat = null
+  var vexp:Mat = null
+  var lamu:Mat = null
+  var slm:Mat = null
+  var mlm:Mat = null
+  var firststep = -1f
+  
 
   /** 
    * Standard initialization. We have:
@@ -75,6 +97,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
    * - n2ld loads the pre-computed X_c variable distribution.
    * - {delta,proposed,tmp}Theta initialized to zeros with correct dimensions.
    * - If desired, initialize modelmats with small values to break symmetry.
+   * - If desired, initialize an internal ADAGrad updater.
    * 
    * Note that the file for the norm2logdata should be in the correct directory.
    */
@@ -82,9 +105,21 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
     model = model0;
     modelmats = model.modelmats
     updatemats = model.updatemats
-    scores0 = zeros(1,model.datasource.opts.batchSize)
-    scores1 = zeros(1,model.datasource.opts.batchSize)
-    diff = zeros(1,model.datasource.opts.batchSize)
+    acceptanceRate = zeros(1, opts.exitThetaAmount * 10)
+    if (opts.smf) {
+      val numnnz = model.asInstanceOf[SMF].getNonzeros()
+      if (numnnz < 0) {
+        println("Something wrong happened, numnnz="+numnnz)
+        sys.exit
+      }
+      scores0 = zeros(1, numnnz*10)
+      scores1 = zeros(1, numnnz*10)
+      diff = zeros(1, numnnz*10)
+    } else {
+      scores0 = zeros(1, model.datasource.opts.batchSize)
+      scores1 = zeros(1, model.datasource.opts.batchSize)
+      diff = zeros(1, model.datasource.opts.batchSize)
+    }
     T = opts.temp
 
     if (opts.Nknown) {
@@ -115,14 +150,44 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
         modelmats(i) <-- normrnd(0, 0.03f, modelmats(i).nrows, modelmats(i).ncols)
       }
     }
+    
+    if (opts.smf) {
+      // This should force adagrad.momentum(i) = momentum(i) in the rest of this code.
+      adagrad = new ADAGrad(opts.asInstanceOf[ADAGrad.Opts])
+      adagrad.init(model)
+      currentUpdatemats = new Array[Mat](nmats)
+      proposedUpdatemats = new Array[Mat](nmats)
+      for (i <- 0 until nmats) {
+        currentUpdatemats(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols)
+        proposedUpdatemats(i) = modelmats(i).zeros(modelmats(i).nrows, modelmats(i).ncols)
+      }
+      
+      sdata0 = GSMat(model.datasource.omats(0).asInstanceOf[SMat])
+      user = FactorModel.reuseuser(sdata0, model.opts.dim, 0f)
+      mm = proposedTheta(0)
+      iavg = proposedTheta(1)
+      avg = proposedTheta(2)
+      uscale = mm.zeros(1,1)
+      vexp = GMat(row(0.5f))
+      lamu = mm.ones(opts.asInstanceOf[SMF.Opts].dim, 1) ∘ opts.asInstanceOf[SMF.Opts].lambdau 
+      slm = mm.ones(1,1) ∘ (opts.asInstanceOf[SMF.Opts].lambdam * sdata0.ncols);
+      mlm = mm.ones(1,1) ∘ (opts.asInstanceOf[SMF.Opts].regmmean * sdata0.ncols);
+      firststep = -1f
+    }
   }
-
+  
 
   /**
    * This performs the update and the MH test based on a minibatch of data. The
    * original data is split up into equal-sized minibatches in the Learner code.
    * (The last minibatch is ignored since it generally has a different size.)
    * 
+   * SMF.scala necessitates extra cases to handle the varying batch sizes. They
+   * differ across "minibatches" so the scores at "the end" have to be cleared
+   * (but since scores are only for current MB, just call "clear"), the number
+   * of nonzeros have to be computed, and then the scores are copied to the
+   * appropriate interval. EDIT: ugh, never mind, doesn't work ...
+   * 
    * @param ipass The current pass over the full (training) data.
    * @param step Progress within the current minibatch, indicated as a numerical
    * 		index representing the starting column of this minibatch.
@@ -130,29 +195,49 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
    *    datasource size and the number of (training) passes.
    */
   override def update(ipass:Int, step:Long, gprogress:Float):Unit = {
-    if (newMinibatch) beforeEachMinibatch()
-    b += model.datasource.opts.batchSize
+    if (newMinibatch) beforeEachMinibatch(ipass, step, gprogress)
     n += 1.0f
 
     // (Part 1) Compute scores for theta and theta', scaled by N/T.
-    scores0 <-- (model.evalbatchg(model.datasource.omats, ipass, step) * (N/T.dv))
+    if (opts.smf) {
+      currentSizeSMF = model.datasource.omats(0).nnz
+      b += currentSizeSMF
+      scores0.clear
+      scores0(0 -> currentSizeSMF) = (model.evalbatchg(model.datasource.omats, ipass, step) * (N/T.dv)).t
+    } else {
+      scores0 <-- (model.evalbatchg(model.datasource.omats, ipass, step) * (N/T.dv))
+      b += scores0.length
+    }
     if (scores0.length == 1) {
       throw new RuntimeException("Need individual scores, but getting a scalar.")
     }
+
     for (i <- 0 until modelmats.length) {
       modelmats(i) <-- proposedTheta(i)
     }
-    scores1 <-- (model.evalbatchg(model.datasource.omats, ipass, step) * (N/T.dv))
-    diff ~ scores1 - scores0
+    if (opts.smf) {
+      scores1.clear
+      scores1(0 -> currentSizeSMF) = (model.evalbatchg(model.datasource.omats, ipass, step) * (N/T.dv)).t
+      diff.clear
+      diff(0 -> currentSizeSMF) = scores1(0 -> currentSizeSMF) - scores0(0 -> currentSizeSMF)
+    } else {
+      scores1 <-- (model.evalbatchg(model.datasource.omats, ipass, step) * (N/T.dv))
+      diff ~ scores1 - scores0
+    }
 
     // (Part 2) Update our \Delta* and sample variance of \Delta*.
-    sumOfSquares += sum((diff)*@(diff)).v
-    sumOfValues += sum(diff).v
-    val deltaStar = sumOfValues/b.v - logu
-    val sampleVariance = (sumOfSquares/b.v - ((sumOfValues/b.v)*(sumOfValues/b.v))) / b.v
+    if (opts.smf) {
+      sumOfSquares += sum((diff(0 -> currentSizeSMF)) *@ (diff(0 -> currentSizeSMF))).v
+      sumOfValues += sum(diff(0 -> currentSizeSMF)).v
+    } else {
+      sumOfSquares += sum((diff)*@(diff)).v
+      sumOfValues += sum(diff).v
+    }
+    val deltaStar = sumOfValues/b.v - psi
+    val sampleVariance = (sumOfSquares/b.v - ((sumOfValues/b.v)*(sumOfValues/b.v))) / b.v    
     val numStd = deltaStar / math.sqrt(sampleVariance)
     var accept = false
-    if (opts.verboseMH) debugPrints(sampleVariance, deltaStar)
+    if (opts.verboseMH) debugPrints(sampleVariance, deltaStar, numStd, sumOfValues/b.v, psi)
 
     // (Part 3) Run our test! 
     // (Part 3.1) Take care of the full data case; this usually indicates a problem.
@@ -170,7 +255,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
       }
     }
     // (Part 3.2) Abnormally good or bad minibatches.
-    else if (math.abs(numStd) > 5.0) {
+    else if (math.abs(numStd) > 10.0) {
       if (opts.verboseMH) {
         println("\tCASE 1: math.abs(numStd) = " +math.abs(numStd))
       }
@@ -192,7 +277,7 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
       val Xc = normlogrnd(1,1).dv
       val testStat = deltaStar + Xn + Xc
       if (opts.verboseMH) {
-        println("\tCASE 3; with testStat = "+testStat)
+        println("\tCASE 3; with testStat = %1.4f (Xn = %1.4f, Xc = %1.4f)" format (testStat, Xn, Xc))
       }
       if (testStat > 0) {
         accept = true
@@ -201,35 +286,128 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
 
     // (Part 4) Reset parameters and use <-- to avoid alias problems.
     if (accept) {
+      if (opts.verboseMH) println("\tACCEPT")
       for (i <- 0 until modelmats.length) {
         tmpTheta(i) <-- modelmats(i) // Now tmpTheta has proposed theta.
-      }     
+      }
+      acceptanceRate(_t) = 1
     } else {
+      if (opts.verboseMH) println("\treject")
       for (i <- 0 until modelmats.length) {
         modelmats(i) <-- tmpTheta(i) // Now modelmats back to old theta.
       }
+      acceptanceRate(_t) = 0
     }
-    if (newMinibatch && accept) afterEachMinibatch()
+
+    if (newMinibatch) afterEachMinibatch(ipass, gprogress)
   }
   
-   
+  
   /**
    * Stuff we should do before each minibatch. This involves calling the
    * proposer, resetting some values, and saving the current model matrix into
-   * `tmpTheta` so we can restore it later when needed.
+   * `tmpTheta` so we can restore it later when needed. Here, we want to set the
+   * proposer matrices, so that when we continue in uupdate, we have the current
+   * and proposed model matrices stored in modelmats and proposedTheta,
+   * respectively.
+   * 
+   * Also, we have a different (i.e. better!) proposer with ADAGrad. The update
+   * *should* affect all of the modelmats(i) due to aliasing (since it changes
+   * adagrad.modelmats(i)). However, this doesn't put it in proposedTheta, so
+   * here's a workaround: get the modelmats stored into tmpTheta. Then do the
+   * update, which will update modelmats to the proposed matrices. Then copy
+   * those into propsoedTheta, and then get current modelmats back to tmpTheta
+   * (i.e. so modelmats remains the same before and after, and it's just the
+   * proposedTheta which changes). With momentum, fortunately it's simpler, we
+   * have that in adagrad.momentum (that's the "proposed" momentum) and simply
+   * keep the "current" one in tmpMomentum.
    */
-  def beforeEachMinibatch() {
+  def beforeEachMinibatch(ipass:Int, pos:Long, gprogress:Float) {
     if (opts.verboseMH) println("\n\tNew minibatch!")
-    randomWalkProposer()
-    logu = ln(rand(1,1)).v
+    for (i <- 0 until modelmats.length) {
+      tmpTheta(i) <-- modelmats(i)
+      currentUpdatemats(i) <-- updatemats(i)
+    } 
+
+    // Important, update the model matrices. Also, compute \psi(1,theta,theta').
+    // Did computation by hand. A lot of this depends on whether it's SMF or not.
+    if (opts.smf) {
+
+      // Now modelmats is DIFFERENT! modelmats is now the proposed stuff. Note
+      // that this does not change the updatemats; that came from SMF.scala. We
+      // then make proposedTheta contain the proposed stuff! Think of this  as
+      // only providing us with proposedTheta (and keeping modelmats unchanged).
+      adagrad.update(ipass, pos, gprogress) 
+      for (i <- 0 until modelmats.length) {
+        proposedTheta(i) <-- adagrad.modelmats(i) // adagrad.modelmats(i) = modelmats(i)
+        modelmats(i) <-- tmpTheta(i) // Should make adagrad.modelmats(i) back to what it was before.
+      }
+      
+      // Next, with proposedTheta, we now compute the proposed log gradient. I
+      // think it's easier to copy the relevant mupdate code here. This is very
+      // specific to SMF.scala, sorry. I actually have to REDO the calls here
+      // ... oh boy. But fortunately, the `user` matrix gets reset to 0 each
+      // time for the real SMF, so I can do it here. ALSO, make sure I don't
+      // update the biases as I would with a call to mupdate in ipass=0.
+
+      sdata0 = GSMat(model.datasource.omats(0).asInstanceOf[SMat]) 
+      user <-- FactorModel.reuseuser(sdata0, model.opts.dim, 0f)
+      mm <-- proposedTheta(0)
+      iavg <-- proposedTheta(1)
+      avg <-- proposedTheta(2)           
+
+      // UUPDATE, which means (for instance) predictions are based on *proposed* theta.
+      if (firststep <= 0) firststep = pos.toFloat;
+      val step = (pos + firststep)/firststep;
+      val texp = if (opts.asInstanceOf[Grad.Opts].texp.asInstanceOf[AnyRef] != null) {
+        opts.asInstanceOf[Grad.Opts].texp.dv
+      } else {
+        opts.asInstanceOf[Grad.Opts].pexp.dv
+      }
+      uscale.set(opts.asInstanceOf[SMF.Opts].urate * math.pow(ipass+1, - texp).toFloat) 
+      val sdata = sdata0 - GMat(iavg+avg);
+      val bb = mm * sdata;
+      val ucounts = sum(sdata0 != 0f);
+      val uci = (ucounts + 1f) ^ (- vexp);
+      for (i <- 0 until opts.asInstanceOf[SMF.Opts].uiter) {
+        val preds = DDS(mm, user, sdata);
+        val deriv = bb - mm * preds - (user ∘ lamu);
+        val du = (deriv ∘ uscale ∘ uci);
+        user ~ user + du;
+      }
+    
+      // MUPDATE
+      val preds = DDS(proposedTheta(0), user, sdata);
+      val diffs = sdata + 2f;
+      diffs.contents ~ sdata.contents - preds.contents;
+      val icomp = sdata0 != 0f
+      val icount = sum(sdata0 != 0f, 2);
+      proposedUpdatemats(1) = (sum(diffs,2) - iavg*mlm) / (icount + 1f);   
+      proposedUpdatemats(2) ~ sum(diffs.contents) / (diffs.contents.length + 1f);
+      if (firststep <= 0) firststep = pos.toFloat;
+      proposedUpdatemats(0) = (user *^ diffs - (mm ∘ slm)) / ((icount + 1).t ^ vexp);
+
+      // Finally, we can get the psi terms.
+      psi = 0f
+      val tau = FMat(adagrad.lrate).v
+      val sigma = FMat(adagrad.opts.langevin).v
+      for (i <- 0 until modelmats.length) {
+        val term1 = modelmats(i) - proposedTheta(i) - tau*proposedUpdatemats(i)
+        val term2 = proposedTheta(i) - modelmats(i) - tau*currentUpdatemats(i)
+        psi += (1f/(2*sigma*sigma)) * ((term1 ddot term1) - (term2 ddot term2)).v
+      }
+    } 
+    else {
+      // Otherwise, it's easy, just do a random walk and set psi = ln(1) = 0.
+      randomWalkProposer()
+      psi = 0f
+    }
+
     newMinibatch = false
     b = 0
     n = 0
     sumOfValues = 0f
     sumOfSquares = 0f
-    for (i <- 0 until modelmats.length) {
-      tmpTheta(i) <-- modelmats(i)
-    }
   }
 
  
@@ -239,23 +417,32 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
    * logic about the burn-in period, and also exit the program if we reach the
    * desired number of samples.
    */
-  def afterEachMinibatch() {
-    t += 1
+  def afterEachMinibatch(ipass:Int, gprogress:Float) {
+    _t += 1
     if (opts.collectData) {
       for (i <- 0 until modelmats.length) {
-        saveFMat(opts.collectDataDir+ "theta_%d_%04d.fmat.lz4" format (i,t), FMat(modelmats(i)))
+        saveFMat(opts.collectDataDir+ "theta_%d_%04d.fmat.lz4" format (i,_t), FMat(modelmats(i)))
       }
-      saveFMat(opts.collectDataDir+ "data_%04d.fmat.lz4" format (t), FMat(b))
+      saveFMat(opts.collectDataDir+ "data_%04d.fmat.lz4" format (_t), FMat(b))
     }
-    if (t == opts.exitThetaAmount && opts.exitTheta) {
-      println("Exiting code now since t=" +t)
+    if (_t == opts.exitThetaAmount && opts.exitTheta) {
+      println("Exiting code now since t=" +_t)
       sys.exit
     }
-    if (t == opts.burnIn) {
+    if (_t == opts.burnIn) {
       println("ALERT: Past burn-in period. Now change temperature, proposer, etc.")
       T = opts.tempAfterBurnin
       opts.sigmaProposer = opts.sigmaProposerAfterBurnin
     }
+    if (opts.smf) {
+      if (opts.saveAcceptRate && (ipass+1) == opts.asInstanceOf[Learner.Options].npasses
+          && gprogress > 0.99) {
+        val lr = adagrad.opts.lrate.v
+        val lang = adagrad.opts.langevin(0).v
+        saveMat(opts.acceptRateDir+"arate_%1.4f_%1.3f.mat.lz4" format (lr,lang), 
+            acceptanceRate(0 -> _t))
+      }
+    }
   }
 
  
@@ -312,11 +499,19 @@ class MHTest(override val opts:MHTest.Opts = new MHTest.Options) extends Updater
  
 
   /** This is for debugging. */
-  def debugPrints(sampleVariance:Float, deltaStar:Float) {
-    println("b="+b+", n="+n+", logu="+logu+ ", b-mbSize="+(b - model.datasource.opts.batchSize).toInt)
-    println("mean(scores0) = "+mean(scores0,2).dv+", mean(scores1) = "+mean(scores1,2).dv)
-    println("sampleVar = " +sampleVariance)
-    println("delta* = " + deltaStar)
+  def debugPrints(sampleVariance:Float, deltaStar:Float, numStd:Double, sumDivB:Float, psi:Float) {
+    val s1 = mean(scores1(0 -> b.toInt)).dv
+    val s0 = mean(scores0(0 -> b.toInt)).dv
+    println("b = %d, n = %d" format (b, n.toInt))
+    println("mean(scores1) (%1.4f) - mean(scores0) (%1.4f) = %1.4f" format (s1, s0, s1-s0))
+    println("maxi(scores1) = "+maxi(scores1(0 -> b.toInt))+", maxi(scores0) = "+maxi(scores0(0 -> b.toInt)))
+    println("mini(scores1) = "+mini(scores1(0 -> b.toInt))+", mini(scores0) = "+mini(scores0(0 -> b.toInt)))
+    if (opts.smf) {
+      println("delta^* (%1.4f) = sumDivB (%1.4f) - psi (%1.4f)" format (deltaStar, sumDivB, psi))
+      println("sampleVar = %1.4f, numStd = %1.4f" format (sampleVariance, numStd))
+    } else {
+      println("delta^* = %1.4f, sampleVar = %1.4f, numStd = %1.4f" format (deltaStar, sampleVariance, numStd))
+    }
   }
   
 }
@@ -341,6 +536,9 @@ object MHTest {
     var exitThetaAmount = 3000
     var initThetaHere = false
     var burnIn = -1
+    var smf = false
+    var saveAcceptRate = false
+    var acceptRateDir = "tmp/"
   }
  
   class Options extends Opts {}
diff --git a/src/main/scala/BIDMach/updaters/Updater.scala b/src/main/scala/BIDMach/updaters/Updater.scala
index 614f67e6..fe34b9d6 100755
--- a/src/main/scala/BIDMach/updaters/Updater.scala
+++ b/src/main/scala/BIDMach/updaters/Updater.scala
@@ -19,7 +19,7 @@ abstract class Updater(val opts:Updater.Opts = new Updater.Options) extends Seri
   def update(ipass:Int, step:Long):Unit = {}
   
   def update(ipass:Int, step:Long, gprogress:Float):Unit = update(ipass, step)
-  
+
   def updateM(ipass:Int):Unit = {
     model.updatePass(ipass)
   }