diff --git a/LICENSE b/LICENSE
index d7a790a62894f..5a8c78b98b2ba 100644
--- a/LICENSE
+++ b/LICENSE
@@ -238,6 +238,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
+     (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/)
      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
      (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
      (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index ba6d30a74c673..4bc3c1a3c8a64 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -24,6 +24,7 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.server.api.*;
 import org.slf4j.Logger;
@@ -118,7 +119,7 @@ protected void serviceInit(Configuration conf) {
     // an application was stopped while the NM was down, we expect yarn to call stopApplication()
     // when it comes back
     registeredExecutorFile =
-      findRegisteredExecutorFile(conf.getStrings("yarn.nodemanager.local-dirs"));
+      findRegisteredExecutorFile(conf.getTrimmedStrings("yarn.nodemanager.local-dirs"));
 
     TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(conf));
     // If authentication is enabled, set up the shuffle server to use a
@@ -191,12 +192,12 @@ public void stopContainer(ContainerTerminationContext context) {
 
   private File findRegisteredExecutorFile(String[] localDirs) {
     for (String dir: localDirs) {
-      File f = new File(dir, "registeredExecutors.ldb");
+      File f = new File(new Path(dir).toUri().getPath(), "registeredExecutors.ldb");
       if (f.exists()) {
         return f;
       }
     }
-    return new File(localDirs[0], "registeredExecutors.ldb");
+    return new File(new Path(localDirs[0]).toUri().getPath(), "registeredExecutors.ldb");
   }
 
   /**
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 9aacb084f648c..32958be7a7fd7 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -56,9 +56,10 @@
  *   Bytes 4 to 8: len(k)
  *   Bytes 8 to 8 + len(k): key data
  *   Bytes 8 + len(k) to 8 + len(k) + len(v): value data
+ *   Bytes 8 + len(k) + len(v) to 8 + len(k) + len(v) + 8: pointer to next pair
  *
  * This means that the first four bytes store the entire record (key + value) length. This format
- * is consistent with {@link org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter},
+ * is compatible with {@link org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter},
  * so we can pass records from this map directly into the sorter to sort records in place.
  */
 public final class BytesToBytesMap extends MemoryConsumer {
@@ -132,7 +133,12 @@ public final class BytesToBytesMap extends MemoryConsumer {
   /**
    * Number of keys defined in the map.
    */
-  private int numElements;
+  private int numKeys;
+
+  /**
+   * Number of values defined in the map. A key could have multiple values.
+   */
+  private int numValues;
 
   /**
    * The map will be expanded once the number of keys exceeds this threshold.
@@ -223,7 +229,12 @@ public BytesToBytesMap(
   /**
    * Returns the number of keys defined in the map.
    */
-  public int numElements() { return numElements; }
+  public int numKeys() { return numKeys; }
+
+  /**
+   * Returns the number of values defined in the map. A key could have multiple values.
+   */
+  public int numValues() { return numValues; }
 
   public final class MapIterator implements Iterator<Location> {
 
@@ -311,7 +322,8 @@ public Location next() {
       if (currentPage != null) {
         int totalLength = Platform.getInt(pageBaseObject, offsetInPage);
         loc.with(currentPage, offsetInPage);
-        offsetInPage += 4 + totalLength;
+        // [total size] [key size] [key] [value] [pointer to next]
+        offsetInPage += 4 + totalLength + 8;
         recordsInPage --;
         return loc;
       } else {
@@ -361,7 +373,7 @@ public long spill(long numBytes) throws IOException {
           while (numRecords > 0) {
             int length = Platform.getInt(base, offset);
             writer.write(base, offset + 4, length, 0);
-            offset += 4 + length;
+            offset += 4 + length + 8;
             numRecords--;
           }
           writer.close();
@@ -395,7 +407,7 @@ public void remove() {
    * `lookup()`, the behavior of the returned iterator is undefined.
    */
   public MapIterator iterator() {
-    return new MapIterator(numElements, loc, false);
+    return new MapIterator(numValues, loc, false);
   }
 
   /**
@@ -409,7 +421,7 @@ public MapIterator iterator() {
    * `lookup()`, the behavior of the returned iterator is undefined.
    */
   public MapIterator destructiveIterator() {
-    return new MapIterator(numElements, loc, true);
+    return new MapIterator(numValues, loc, true);
   }
 
   /**
@@ -559,6 +571,20 @@ private Location with(Object base, long offset, int length) {
       return this;
     }
 
+    /**
+     * Find the next pair that has the same key as current one.
+     */
+    public boolean nextValue() {
+      assert isDefined;
+      long nextAddr = Platform.getLong(baseObject, valueOffset + valueLength);
+      if (nextAddr == 0) {
+        return false;
+      } else {
+        updateAddressesAndSizes(nextAddr);
+        return true;
+      }
+    }
+
     /**
      * Returns the memory page that contains the current record.
      * This is only valid if this is returned by {@link BytesToBytesMap#iterator()}.
@@ -625,10 +651,9 @@ public int getValueLength() {
     }
 
     /**
-     * Store a new key and value. This method may only be called once for a given key; if you want
-     * to update the value associated with a key, then you can directly manipulate the bytes stored
-     * at the value address. The return value indicates whether the put succeeded or whether it
-     * failed because additional memory could not be acquired.
+     * Append a new value for the key. This method could be called multiple times for a given key.
+     * The return value indicates whether the put succeeded or whether it failed because additional
+     * memory could not be acquired.
      * <p>
      * It is only valid to call this method immediately after calling `lookup()` using the same key.
      * </p>
@@ -637,7 +662,7 @@ public int getValueLength() {
      * </p>
      * <p>
      * After calling this method, calls to `get[Key|Value]Address()` and `get[Key|Value]Length`
-     * will return information on the data stored by this `putNewKey` call.
+     * will return information on the data stored by this `append` call.
      * </p>
      * <p>
      * As an example usage, here's the proper way to store a new key:
@@ -645,7 +670,7 @@ public int getValueLength() {
      * <pre>
      *   Location loc = map.lookup(keyBase, keyOffset, keyLength);
      *   if (!loc.isDefined()) {
-     *     if (!loc.putNewKey(keyBase, keyOffset, keyLength, ...)) {
+     *     if (!loc.append(keyBase, keyOffset, keyLength, ...)) {
      *       // handle failure to grow map (by spilling, for example)
      *     }
      *   }
@@ -657,26 +682,23 @@ public int getValueLength() {
      * @return true if the put() was successful and false if the put() failed because memory could
      *         not be acquired.
      */
-    public boolean putNewKey(Object keyBase, long keyOffset, int keyLength,
-        Object valueBase, long valueOffset, int valueLength) {
-      assert (!isDefined) : "Can only set value once for a key";
-      assert (keyLength % 8 == 0);
-      assert (valueLength % 8 == 0);
-      assert(longArray != null);
-
+    public boolean append(Object kbase, long koff, int klen, Object vbase, long voff, int vlen) {
+      assert (klen % 8 == 0);
+      assert (vlen % 8 == 0);
+      assert (longArray != null);
 
-      if (numElements == MAX_CAPACITY
+      if (numKeys == MAX_CAPACITY
         // The map could be reused from last spill (because of no enough memory to grow),
         // then we don't try to grow again if hit the `growthThreshold`.
-        || !canGrowArray && numElements > growthThreshold) {
+        || !canGrowArray && numKeys > growthThreshold) {
         return false;
       }
 
       // Here, we'll copy the data into our data pages. Because we only store a relative offset from
       // the key address instead of storing the absolute address of the value, the key and value
       // must be stored in the same memory page.
-      // (8 byte key length) (key) (value)
-      final long recordLength = 8 + keyLength + valueLength;
+      // (8 byte key length) (key) (value) (8 byte pointer to next value)
+      final long recordLength = 8 + klen + vlen + 8;
       if (currentPage == null || currentPage.size() - pageCursor < recordLength) {
         if (!acquireNewPage(recordLength + 4L)) {
           return false;
@@ -687,30 +709,40 @@ public boolean putNewKey(Object keyBase, long keyOffset, int keyLength,
       final Object base = currentPage.getBaseObject();
       long offset = currentPage.getBaseOffset() + pageCursor;
       final long recordOffset = offset;
-      Platform.putInt(base, offset, keyLength + valueLength + 4);
-      Platform.putInt(base, offset + 4, keyLength);
+      Platform.putInt(base, offset, klen + vlen + 4);
+      Platform.putInt(base, offset + 4, klen);
       offset += 8;
-      Platform.copyMemory(keyBase, keyOffset, base, offset, keyLength);
-      offset += keyLength;
-      Platform.copyMemory(valueBase, valueOffset, base, offset, valueLength);
+      Platform.copyMemory(kbase, koff, base, offset, klen);
+      offset += klen;
+      Platform.copyMemory(vbase, voff, base, offset, vlen);
+      offset += vlen;
+      Platform.putLong(base, offset, 0);
 
       // --- Update bookkeeping data structures ----------------------------------------------------
       offset = currentPage.getBaseOffset();
       Platform.putInt(base, offset, Platform.getInt(base, offset) + 1);
       pageCursor += recordLength;
-      numElements++;
       final long storedKeyAddress = taskMemoryManager.encodePageNumberAndOffset(
         currentPage, recordOffset);
-      longArray.set(pos * 2, storedKeyAddress);
-      longArray.set(pos * 2 + 1, keyHashcode);
-      updateAddressesAndSizes(storedKeyAddress);
-      isDefined = true;
+      numValues++;
+      if (isDefined) {
+        // put this pair at the end of chain
+        while (nextValue()) { /* do nothing */ }
+        Platform.putLong(baseObject, valueOffset + valueLength, storedKeyAddress);
+        nextValue();  // point to new added value
+      } else {
+        numKeys++;
+        longArray.set(pos * 2, storedKeyAddress);
+        longArray.set(pos * 2 + 1, keyHashcode);
+        updateAddressesAndSizes(storedKeyAddress);
+        isDefined = true;
 
-      if (numElements > growthThreshold && longArray.size() < MAX_CAPACITY) {
-        try {
-          growAndRehash();
-        } catch (OutOfMemoryError oom) {
-          canGrowArray = false;
+        if (numKeys > growthThreshold && longArray.size() < MAX_CAPACITY) {
+          try {
+            growAndRehash();
+          } catch (OutOfMemoryError oom) {
+            canGrowArray = false;
+          }
         }
       }
       return true;
@@ -866,7 +898,8 @@ public LongArray getArray() {
    * Reset this map to initialized state.
    */
   public void reset() {
-    numElements = 0;
+    numKeys = 0;
+    numValues = 0;
     longArray.zeroOut();
 
     while (dataPages.size() > 0) {
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 320a20033daea..81e41e6fa715e 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -113,9 +113,15 @@ private[spark] class CoarseGrainedExecutorBackend(
 
     case Shutdown =>
       stopping.set(true)
-      executor.stop()
-      stop()
-      rpcEnv.shutdown()
+      new Thread("CoarseGrainedExecutorBackend-stop-executor") {
+        override def run(): Unit = {
+          // executor.stop() will call `SparkEnv.stop()` which waits until RpcEnv stops totally.
+          // However, if `executor.stop()` runs in some thread of RpcEnv, RpcEnv won't be able to
+          // stop until `executor.stop()` returns, which becomes a dead-lock (See SPARK-14180).
+          // Therefore, we put this line in a new thread.
+          executor.stop()
+        }
+      }.start()
   }
 
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index b7919efc4b12b..eb4f5331d6a60 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -356,20 +356,17 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   /**
    * Reset the state of CoarseGrainedSchedulerBackend to the initial state. Currently it will only
-   * be called in the yarn-client mode when AM re-registers after a failure, also dynamic
-   * allocation is enabled.
+   * be called in the yarn-client mode when AM re-registers after a failure.
    * */
   protected def reset(): Unit = synchronized {
-    if (Utils.isDynamicAllocationEnabled(conf)) {
-      numPendingExecutors = 0
-      executorsPendingToRemove.clear()
-
-      // Remove all the lingering executors that should be removed but not yet. The reason might be
-      // because (1) disconnected event is not yet received; (2) executors die silently.
-      executorDataMap.toMap.foreach { case (eid, _) =>
-        driverEndpoint.askWithRetry[Boolean](
-          RemoveExecutor(eid, SlaveLost("Stale executor after cluster manager re-registered.")))
-      }
+    numPendingExecutors = 0
+    executorsPendingToRemove.clear()
+
+    // Remove all the lingering executors that should be removed but not yet. The reason might be
+    // because (1) disconnected event is not yet received; (2) executors die silently.
+    executorDataMap.toMap.foreach { case (eid, _) =>
+      driverEndpoint.askWithRetry[Boolean](
+        RemoveExecutor(eid, SlaveLost("Stale executor after cluster manager re-registered.")))
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index f552b498a76de..3008520f61c3f 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -66,17 +66,6 @@ class StorageStatusListener(conf: SparkConf) extends SparkListener {
     }
   }
 
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized {
-    val info = taskEnd.taskInfo
-    val metrics = taskEnd.taskMetrics
-    if (info != null && metrics != null) {
-      val updatedBlocks = metrics.updatedBlockStatuses
-      if (updatedBlocks.length > 0) {
-        updateStorageStatus(info.executorId, updatedBlocks)
-      }
-    }
-  }
-
   override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = synchronized {
     updateStorageStatus(unpersistRDD.rddId)
   }
@@ -102,4 +91,14 @@ class StorageStatusListener(conf: SparkConf) extends SparkListener {
       }
     }
   }
+
+  override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
+    val executorId = blockUpdated.blockUpdatedInfo.blockManagerId.executorId
+    val blockId = blockUpdated.blockUpdatedInfo.blockId
+    val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel
+    val memSize = blockUpdated.blockUpdatedInfo.memSize
+    val diskSize = blockUpdated.blockUpdatedInfo.diskSize
+    val blockStatus = BlockStatus(storageLevel, memSize, diskSize)
+    updateStorageStatus(executorId, Seq((blockId, blockStatus)))
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
index 8f75b586e1399..50095831b4a53 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -57,17 +57,6 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Bloc
     StorageUtils.updateRddInfo(rddInfosToUpdate, activeStorageStatusList)
   }
 
-  /**
-   * Assumes the storage status list is fully up-to-date. This implies the corresponding
-   * StorageStatusSparkListener must process the SparkListenerTaskEnd event before this listener.
-   */
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized {
-    val metrics = taskEnd.taskMetrics
-    if (metrics != null && metrics.updatedBlockStatuses.nonEmpty) {
-      updateRDDInfo(metrics.updatedBlockStatuses)
-    }
-  }
-
   override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized {
     val rddInfos = stageSubmitted.stageInfo.rddInfos
     rddInfos.foreach { info => _rddInfoMap.getOrElseUpdate(info.id, info) }
@@ -84,4 +73,14 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Bloc
   override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = synchronized {
     _rddInfoMap.remove(unpersistRDD.rddId)
   }
+
+  override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
+    super.onBlockUpdated(blockUpdated)
+    val blockId = blockUpdated.blockUpdatedInfo.blockId
+    val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel
+    val memSize = blockUpdated.blockUpdatedInfo.memSize
+    val diskSize = blockUpdated.blockUpdatedInfo.diskSize
+    val blockStatus = BlockStatus(storageLevel, memSize, diskSize)
+    updateRDDInfo(Seq((blockId, blockStatus)))
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
new file mode 100644
index 0000000000000..4dcf95177aa78
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import javax.annotation.concurrent.GuardedBy
+
+/**
+ * A special Thread that provides "runUninterruptibly" to allow running codes without being
+ * interrupted by `Thread.interrupt()`. If `Thread.interrupt()` is called during runUninterruptibly
+ * is running, it won't set the interrupted status. Instead, setting the interrupted status will be
+ * deferred until it's returning from "runUninterruptibly".
+ *
+ * Note: "runUninterruptibly" should be called only in `this` thread.
+ */
+private[spark] class UninterruptibleThread(name: String) extends Thread(name) {
+
+  /** A monitor to protect "uninterruptible" and "interrupted" */
+  private val uninterruptibleLock = new Object
+
+  /**
+   * Indicates if `this`  thread are in the uninterruptible status. If so, interrupting
+   * "this" will be deferred until `this`  enters into the interruptible status.
+   */
+  @GuardedBy("uninterruptibleLock")
+  private var uninterruptible = false
+
+  /**
+   * Indicates if we should interrupt `this` when we are leaving the uninterruptible zone.
+   */
+  @GuardedBy("uninterruptibleLock")
+  private var shouldInterruptThread = false
+
+  /**
+   * Run `f` uninterruptibly in `this` thread. The thread won't be interrupted before returning
+   * from `f`.
+   *
+   * If this method finds that `interrupt` is called before calling `f` and it's not inside another
+   * `runUninterruptibly`, it will throw `InterruptedException`.
+   *
+   * Note: this method should be called only in `this` thread.
+   */
+  def runUninterruptibly[T](f: => T): T = {
+    if (Thread.currentThread() != this) {
+      throw new IllegalStateException(s"Call runUninterruptibly in a wrong thread. " +
+        s"Expected: $this but was ${Thread.currentThread()}")
+    }
+
+    if (uninterruptibleLock.synchronized { uninterruptible }) {
+      // We are already in the uninterruptible status. So just run "f" and return
+      return f
+    }
+
+    uninterruptibleLock.synchronized {
+      // Clear the interrupted status if it's set.
+      if (Thread.interrupted() || shouldInterruptThread) {
+        shouldInterruptThread = false
+        // Since it's interrupted, we don't need to run `f` which may be a long computation.
+        // Throw InterruptedException as we don't have a T to return.
+        throw new InterruptedException()
+      }
+      uninterruptible = true
+    }
+    try {
+      f
+    } finally {
+      uninterruptibleLock.synchronized {
+        uninterruptible = false
+        if (shouldInterruptThread) {
+          // Recover the interrupted status
+          super.interrupt()
+          shouldInterruptThread = false
+        }
+      }
+    }
+  }
+
+  /**
+   * Tests whether `interrupt()` has been called.
+   */
+  override def isInterrupted: Boolean = {
+    super.isInterrupted || uninterruptibleLock.synchronized { shouldInterruptThread }
+  }
+
+  /**
+   * Interrupt `this` thread if possible. If `this` is in the uninterruptible status, it won't be
+   * interrupted until it enters into the interruptible status.
+   */
+  override def interrupt(): Unit = {
+    uninterruptibleLock.synchronized {
+      if (uninterruptible) {
+        shouldInterruptThread = true
+      } else {
+        super.interrupt()
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 3c61528ab5287..2921b939bc0cf 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -39,7 +39,14 @@ import org.apache.spark.annotation.DeveloperApi
 trait RandomSampler[T, U] extends Pseudorandom with Cloneable with Serializable {
 
   /** take a random sample */
-  def sample(items: Iterator[T]): Iterator[U]
+  def sample(items: Iterator[T]): Iterator[U] =
+    items.filter(_ => sample > 0).asInstanceOf[Iterator[U]]
+
+  /**
+   * Whether to sample the next item or not.
+   * Return how many times the next item will be sampled. Return 0 if it is not sampled.
+   */
+  def sample(): Int
 
   /** return a copy of the RandomSampler object */
   override def clone: RandomSampler[T, U] =
@@ -107,21 +114,13 @@ class BernoulliCellSampler[T](lb: Double, ub: Double, complement: Boolean = fals
 
   override def setSeed(seed: Long): Unit = rng.setSeed(seed)
 
-  override def sample(items: Iterator[T]): Iterator[T] = {
+  override def sample(): Int = {
     if (ub - lb <= 0.0) {
-      if (complement) items else Iterator.empty
+      if (complement) 1 else 0
     } else {
-      if (complement) {
-        items.filter { item => {
-          val x = rng.nextDouble()
-          (x < lb) || (x >= ub)
-        }}
-      } else {
-        items.filter { item => {
-          val x = rng.nextDouble()
-          (x >= lb) && (x < ub)
-        }}
-      }
+      val x = rng.nextDouble()
+      val n = if ((x >= lb) && (x < ub)) 1 else 0
+      if (complement) 1 - n else n
     }
   }
 
@@ -155,15 +154,22 @@ class BernoulliSampler[T: ClassTag](fraction: Double) extends RandomSampler[T, T
 
   override def setSeed(seed: Long): Unit = rng.setSeed(seed)
 
-  override def sample(items: Iterator[T]): Iterator[T] = {
+  private lazy val gapSampling: GapSampling =
+    new GapSampling(fraction, rng, RandomSampler.rngEpsilon)
+
+  override def sample(): Int = {
     if (fraction <= 0.0) {
-      Iterator.empty
+      0
     } else if (fraction >= 1.0) {
-      items
+      1
     } else if (fraction <= RandomSampler.defaultMaxGapSamplingFraction) {
-      new GapSamplingIterator(items, fraction, rng, RandomSampler.rngEpsilon)
+      gapSampling.sample()
     } else {
-      items.filter { _ => rng.nextDouble() <= fraction }
+      if (rng.nextDouble() <= fraction) {
+        1
+      } else {
+        0
+      }
     }
   }
 
@@ -201,15 +207,29 @@ class PoissonSampler[T: ClassTag](
     rngGap.setSeed(seed)
   }
 
-  override def sample(items: Iterator[T]): Iterator[T] = {
+  private lazy val gapSamplingReplacement =
+    new GapSamplingReplacement(fraction, rngGap, RandomSampler.rngEpsilon)
+
+  override def sample(): Int = {
     if (fraction <= 0.0) {
-      Iterator.empty
+      0
     } else if (useGapSamplingIfPossible &&
                fraction <= RandomSampler.defaultMaxGapSamplingFraction) {
-      new GapSamplingReplacementIterator(items, fraction, rngGap, RandomSampler.rngEpsilon)
+      gapSamplingReplacement.sample()
+    } else {
+      rng.sample()
+    }
+  }
+
+  override def sample(items: Iterator[T]): Iterator[T] = {
+    if (fraction <= 0.0) {
+      Iterator.empty
     } else {
+      val useGapSampling = useGapSamplingIfPossible &&
+        fraction <= RandomSampler.defaultMaxGapSamplingFraction
+
       items.flatMap { item =>
-        val count = rng.sample()
+        val count = if (useGapSampling) gapSamplingReplacement.sample() else rng.sample()
         if (count == 0) Iterator.empty else Iterator.fill(count)(item)
       }
     }
@@ -220,50 +240,36 @@ class PoissonSampler[T: ClassTag](
 
 
 private[spark]
-class GapSamplingIterator[T: ClassTag](
-    var data: Iterator[T],
+class GapSampling(
     f: Double,
     rng: Random = RandomSampler.newDefaultRNG,
-    epsilon: Double = RandomSampler.rngEpsilon) extends Iterator[T] {
+    epsilon: Double = RandomSampler.rngEpsilon) extends Serializable {
 
   require(f > 0.0  &&  f < 1.0, s"Sampling fraction ($f) must reside on open interval (0, 1)")
   require(epsilon > 0.0, s"epsilon ($epsilon) must be > 0")
 
-  /** implement efficient linear-sequence drop until Scala includes fix for jira SI-8835. */
-  private val iterDrop: Int => Unit = {
-    val arrayClass = Array.empty[T].iterator.getClass
-    val arrayBufferClass = ArrayBuffer.empty[T].iterator.getClass
-    data.getClass match {
-      case `arrayClass` =>
-        (n: Int) => { data = data.drop(n) }
-      case `arrayBufferClass` =>
-        (n: Int) => { data = data.drop(n) }
-      case _ =>
-        (n: Int) => {
-          var j = 0
-          while (j < n && data.hasNext) {
-            data.next()
-            j += 1
-          }
-        }
-    }
-  }
-
-  override def hasNext: Boolean = data.hasNext
+  private val lnq = math.log1p(-f)
 
-  override def next(): T = {
-    val r = data.next()
-    advance()
-    r
+  /** Return 1 if the next item should be sampled. Otherwise, return 0. */
+  def sample(): Int = {
+    if (countForDropping > 0) {
+      countForDropping -= 1
+      0
+    } else {
+      advance()
+      1
+    }
   }
 
-  private val lnq = math.log1p(-f)
+  private var countForDropping: Int = 0
 
-  /** skip elements that won't be sampled, according to geometric dist P(k) = (f)(1-f)^k. */
+  /**
+   * Decide the number of elements that won't be sampled,
+   * according to geometric dist P(k) = (f)(1-f)^k.
+   */
   private def advance(): Unit = {
     val u = math.max(rng.nextDouble(), epsilon)
-    val k = (math.log(u) / lnq).toInt
-    iterDrop(k)
+    countForDropping = (math.log(u) / lnq).toInt
   }
 
   /** advance to first sample as part of object construction. */
@@ -273,73 +279,24 @@ class GapSamplingIterator[T: ClassTag](
   // work reliably.
 }
 
+
 private[spark]
-class GapSamplingReplacementIterator[T: ClassTag](
-    var data: Iterator[T],
-    f: Double,
-    rng: Random = RandomSampler.newDefaultRNG,
-    epsilon: Double = RandomSampler.rngEpsilon) extends Iterator[T] {
+class GapSamplingReplacement(
+    val f: Double,
+    val rng: Random = RandomSampler.newDefaultRNG,
+    epsilon: Double = RandomSampler.rngEpsilon) extends Serializable {
 
   require(f > 0.0, s"Sampling fraction ($f) must be > 0")
   require(epsilon > 0.0, s"epsilon ($epsilon) must be > 0")
 
-  /** implement efficient linear-sequence drop until scala includes fix for jira SI-8835. */
-  private val iterDrop: Int => Unit = {
-    val arrayClass = Array.empty[T].iterator.getClass
-    val arrayBufferClass = ArrayBuffer.empty[T].iterator.getClass
-    data.getClass match {
-      case `arrayClass` =>
-        (n: Int) => { data = data.drop(n) }
-      case `arrayBufferClass` =>
-        (n: Int) => { data = data.drop(n) }
-      case _ =>
-        (n: Int) => {
-          var j = 0
-          while (j < n && data.hasNext) {
-            data.next()
-            j += 1
-          }
-        }
-    }
-  }
-
-  /** current sampling value, and its replication factor, as we are sampling with replacement. */
-  private var v: T = _
-  private var rep: Int = 0
-
-  override def hasNext: Boolean = data.hasNext || rep > 0
-
-  override def next(): T = {
-    val r = v
-    rep -= 1
-    if (rep <= 0) advance()
-    r
-  }
-
-  /**
-   * Skip elements with replication factor zero (i.e. elements that won't be sampled).
-   * Samples 'k' from geometric distribution  P(k) = (1-q)(q)^k, where q = e^(-f), that is
-   * q is the probability of Poisson(0; f)
-   */
-  private def advance(): Unit = {
-    val u = math.max(rng.nextDouble(), epsilon)
-    val k = (math.log(u) / (-f)).toInt
-    iterDrop(k)
-    // set the value and replication factor for the next value
-    if (data.hasNext) {
-      v = data.next()
-      rep = poissonGE1
-    }
-  }
-
-  private val q = math.exp(-f)
+  protected val q = math.exp(-f)
 
   /**
    * Sample from Poisson distribution, conditioned such that the sampled value is >= 1.
    * This is an adaptation from the algorithm for Generating Poisson distributed random variables:
    * http://en.wikipedia.org/wiki/Poisson_distribution
    */
-  private def poissonGE1: Int = {
+  protected def poissonGE1: Int = {
     // simulate that the standard poisson sampling
     // gave us at least one iteration, for a sample of >= 1
     var pp = q + ((1.0 - q) * rng.nextDouble())
@@ -353,6 +310,28 @@ class GapSamplingReplacementIterator[T: ClassTag](
     }
     r
   }
+  private var countForDropping: Int = 0
+
+  def sample(): Int = {
+    if (countForDropping > 0) {
+      countForDropping -= 1
+      0
+    } else {
+      val r = poissonGE1
+      advance()
+      r
+    }
+  }
+
+  /**
+   * Skip elements with replication factor zero (i.e. elements that won't be sampled).
+   * Samples 'k' from geometric distribution  P(k) = (1-q)(q)^k, where q = e^(-f), that is
+   * q is the probabililty of Poisson(0; f)
+   */
+  private def advance(): Unit = {
+    val u = math.max(rng.nextDouble(), epsilon)
+    countForDropping = (math.log(u) / (-f)).toInt
+  }
 
   /** advance to first sample as part of object construction. */
   advance()
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 449fb45c301e2..84b82f5a4742c 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -182,7 +182,7 @@ private static boolean arrayEquals(
   public void emptyMap() {
     BytesToBytesMap map = new BytesToBytesMap(taskMemoryManager, 64, PAGE_SIZE_BYTES);
     try {
-      Assert.assertEquals(0, map.numElements());
+      Assert.assertEquals(0, map.numKeys());
       final int keyLengthInWords = 10;
       final int keyLengthInBytes = keyLengthInWords * 8;
       final byte[] key = getRandomByteArray(keyLengthInWords);
@@ -204,7 +204,7 @@ public void setAndRetrieveAKey() {
       final BytesToBytesMap.Location loc =
         map.lookup(keyData, Platform.BYTE_ARRAY_OFFSET, recordLengthBytes);
       Assert.assertFalse(loc.isDefined());
-      Assert.assertTrue(loc.putNewKey(
+      Assert.assertTrue(loc.append(
         keyData,
         Platform.BYTE_ARRAY_OFFSET,
         recordLengthBytes,
@@ -232,7 +232,7 @@ public void setAndRetrieveAKey() {
         getByteArray(loc.getValueBase(), loc.getValueOffset(), recordLengthBytes));
 
       try {
-        Assert.assertTrue(loc.putNewKey(
+        Assert.assertTrue(loc.append(
           keyData,
           Platform.BYTE_ARRAY_OFFSET,
           recordLengthBytes,
@@ -260,7 +260,7 @@ private void iteratorTestBase(boolean destructive) throws Exception {
         Assert.assertFalse(loc.isDefined());
         // Ensure that we store some zero-length keys
         if (i % 5 == 0) {
-          Assert.assertTrue(loc.putNewKey(
+          Assert.assertTrue(loc.append(
             null,
             Platform.LONG_ARRAY_OFFSET,
             0,
@@ -269,7 +269,7 @@ private void iteratorTestBase(boolean destructive) throws Exception {
             8
           ));
         } else {
-          Assert.assertTrue(loc.putNewKey(
+          Assert.assertTrue(loc.append(
             value,
             Platform.LONG_ARRAY_OFFSET,
             8,
@@ -349,7 +349,7 @@ public void iteratingOverDataPagesWithWastedSpace() throws Exception {
           KEY_LENGTH
         );
         Assert.assertFalse(loc.isDefined());
-        Assert.assertTrue(loc.putNewKey(
+        Assert.assertTrue(loc.append(
           key,
           Platform.LONG_ARRAY_OFFSET,
           KEY_LENGTH,
@@ -417,7 +417,7 @@ public void randomizedStressTest() {
             key.length
           );
           Assert.assertFalse(loc.isDefined());
-          Assert.assertTrue(loc.putNewKey(
+          Assert.assertTrue(loc.append(
             key,
             Platform.BYTE_ARRAY_OFFSET,
             key.length,
@@ -471,7 +471,7 @@ public void randomizedTestWithRecordsLargerThanPageSize() {
             key.length
           );
           Assert.assertFalse(loc.isDefined());
-          Assert.assertTrue(loc.putNewKey(
+          Assert.assertTrue(loc.append(
             key,
             Platform.BYTE_ARRAY_OFFSET,
             key.length,
@@ -514,7 +514,7 @@ public void failureToAllocateFirstPage() {
       final BytesToBytesMap.Location loc =
         map.lookup(emptyArray, Platform.LONG_ARRAY_OFFSET, 0);
       Assert.assertFalse(loc.isDefined());
-      Assert.assertFalse(loc.putNewKey(
+      Assert.assertFalse(loc.append(
         emptyArray, Platform.LONG_ARRAY_OFFSET, 0, emptyArray, Platform.LONG_ARRAY_OFFSET, 0));
     } finally {
       map.free();
@@ -535,7 +535,7 @@ public void failureToGrow() {
         final long[] arr = new long[]{i};
         final BytesToBytesMap.Location loc = map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8);
         success =
-          loc.putNewKey(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
+          loc.append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
         if (!success) {
           break;
         }
@@ -556,7 +556,7 @@ public void spillInIterator() throws IOException {
       for (i = 0; i < 1024; i++) {
         final long[] arr = new long[]{i};
         final BytesToBytesMap.Location loc = map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8);
-        loc.putNewKey(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
+        loc.append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
       }
       BytesToBytesMap.MapIterator iter = map.iterator();
       for (i = 0; i < 100; i++) {
@@ -586,6 +586,44 @@ public void spillInIterator() throws IOException {
     }
   }
 
+  @Test
+  public void multipleValuesForSameKey() {
+    BytesToBytesMap map =
+      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 1, 0.75, 1024, false);
+    try {
+      int i;
+      for (i = 0; i < 1024; i++) {
+        final long[] arr = new long[]{i};
+        map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8)
+          .append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
+      }
+      assert map.numKeys() == 1024;
+      assert map.numValues() == 1024;
+      for (i = 0; i < 1024; i++) {
+        final long[] arr = new long[]{i};
+        map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8)
+          .append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
+      }
+      assert map.numKeys() == 1024;
+      assert map.numValues() == 2048;
+      for (i = 0; i < 1024; i++) {
+        final long[] arr = new long[]{i};
+        final BytesToBytesMap.Location loc = map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8);
+        assert loc.isDefined();
+        assert loc.nextValue();
+        assert !loc.nextValue();
+      }
+      BytesToBytesMap.MapIterator iter = map.iterator();
+      for (i = 0; i < 2048; i++) {
+        assert iter.hasNext();
+        final BytesToBytesMap.Location loc = iter.next();
+        assert loc.isDefined();
+      }
+    } finally {
+      map.free();
+    }
+  }
+
   @Test
   public void initialCapacityBoundsChecking() {
     try {
@@ -608,7 +646,7 @@ public void initialCapacityBoundsChecking() {
 
   @Test
   public void testPeakMemoryUsed() {
-    final long recordLengthBytes = 24;
+    final long recordLengthBytes = 32;
     final long pageSizeBytes = 256 + 8; // 8 bytes for end-of-page marker
     final long numRecordsPerPage = (pageSizeBytes - 8) / recordLengthBytes;
     final BytesToBytesMap map = new BytesToBytesMap(taskMemoryManager, 1024, pageSizeBytes);
@@ -622,7 +660,7 @@ public void testPeakMemoryUsed() {
     try {
       for (long i = 0; i < numRecordsPerPage * 10; i++) {
         final long[] value = new long[]{i};
-        map.lookup(value, Platform.LONG_ARRAY_OFFSET, 8).putNewKey(
+        map.lookup(value, Platform.LONG_ARRAY_OFFSET, 8).append(
           value,
           Platform.LONG_ARRAY_OFFSET,
           8,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index 4a88eeee747dc..efc865919b0d7 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -2,8 +2,8 @@
   "id" : "<driver>",
   "hostPort" : "localhost:57971",
   "isActive" : true,
-  "rddBlocks" : 8,
-  "memoryUsed" : 28000128,
+  "rddBlocks" : 0,
+  "memoryUsed" : 0,
   "diskUsed" : 0,
   "totalCores" : 0,
   "maxTasks" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
index f79a31022d214..8878e547a7984 100644
--- a/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json
@@ -1,9 +1 @@
-[ {
-  "id" : 0,
-  "name" : "0",
-  "numPartitions" : 8,
-  "numCachedPartitions" : 8,
-  "storageLevel" : "Memory Deserialized 1x Replicated",
-  "memoryUsed" : 28000128,
-  "diskUsed" : 0
-} ]
\ No newline at end of file
+[ ]
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 5822261d8da75..79e4efb1a84fd 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -140,8 +140,9 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     "stage task list from multi-attempt app json(2)" ->
       "applications/local-1426533911241/2/stages/0/0/taskList",
 
-    "rdd list storage json" -> "applications/local-1422981780767/storage/rdd",
-    "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0"
+    "rdd list storage json" -> "applications/local-1422981780767/storage/rdd"
+    // Todo: enable this test when logging the even of onBlockUpdated. See: SPARK-13845
+    // "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0"
   )
 
   // run a bunch of characterization tests -- just verify the behavior is the same as what is saved
diff --git a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
index 132a5fa9a80fb..cb0de1c6beb6b 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
@@ -29,6 +29,8 @@ class MockSampler extends RandomSampler[Long, Long] {
     s = seed
   }
 
+  override def sample(): Int = 1
+
   override def sample(items: Iterator[Long]): Iterator[Long] = {
     Iterator(s)
   }
diff --git a/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
index 14daa003bc5a6..9835f11a2f7ed 100644
--- a/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
@@ -82,48 +82,51 @@ class StorageStatusListenerSuite extends SparkFunSuite {
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
   }
 
-  test("task end with updated blocks") {
+  test("updated blocks") {
     val listener = new StorageStatusListener(conf)
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm2, 2000L))
-    val taskMetrics1 = new TaskMetrics
-    val taskMetrics2 = new TaskMetrics
-    val block1 = (RDDBlockId(1, 1), BlockStatus(StorageLevel.DISK_ONLY, 0L, 100L))
-    val block2 = (RDDBlockId(1, 2), BlockStatus(StorageLevel.DISK_ONLY, 0L, 200L))
-    val block3 = (RDDBlockId(4, 0), BlockStatus(StorageLevel.DISK_ONLY, 0L, 300L))
-    taskMetrics1.setUpdatedBlockStatuses(Seq(block1, block2))
-    taskMetrics2.setUpdatedBlockStatuses(Seq(block3))
-
-    // Task end with new blocks
+
+    val blockUpdateInfos1 = Seq(
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 1), StorageLevel.DISK_ONLY, 0L, 100L),
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 2), StorageLevel.DISK_ONLY, 0L, 200L)
+    )
+    val blockUpdateInfos2 =
+      Seq(BlockUpdatedInfo(bm2, RDDBlockId(4, 0), StorageLevel.DISK_ONLY, 0L, 300L))
+
+    // Add some new blocks
     assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics1))
+    postUpdateBlock(listener, blockUpdateInfos1)
     assert(listener.executorIdToStorageStatus("big").numBlocks === 2)
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
     assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
     assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo2, taskMetrics2))
+    postUpdateBlock(listener, blockUpdateInfos2)
     assert(listener.executorIdToStorageStatus("big").numBlocks === 2)
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 1)
     assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
     assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
     assert(listener.executorIdToStorageStatus("fat").containsBlock(RDDBlockId(4, 0)))
 
-    // Task end with dropped blocks
-    val droppedBlock1 = (RDDBlockId(1, 1), BlockStatus(StorageLevel.NONE, 0L, 0L))
-    val droppedBlock2 = (RDDBlockId(1, 2), BlockStatus(StorageLevel.NONE, 0L, 0L))
-    val droppedBlock3 = (RDDBlockId(4, 0), BlockStatus(StorageLevel.NONE, 0L, 0L))
-    taskMetrics1.setUpdatedBlockStatuses(Seq(droppedBlock1, droppedBlock3))
-    taskMetrics2.setUpdatedBlockStatuses(Seq(droppedBlock2, droppedBlock3))
+    // Dropped the blocks
+    val droppedBlockInfo1 = Seq(
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 1), StorageLevel.NONE, 0L, 0L),
+      BlockUpdatedInfo(bm1, RDDBlockId(4, 0), StorageLevel.NONE, 0L, 0L)
+    )
+    val droppedBlockInfo2 = Seq(
+      BlockUpdatedInfo(bm2, RDDBlockId(1, 2), StorageLevel.NONE, 0L, 0L),
+      BlockUpdatedInfo(bm2, RDDBlockId(4, 0), StorageLevel.NONE, 0L, 0L)
+    )
 
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics1))
+    postUpdateBlock(listener, droppedBlockInfo1)
     assert(listener.executorIdToStorageStatus("big").numBlocks === 1)
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 1)
     assert(!listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
     assert(listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 2)))
     assert(listener.executorIdToStorageStatus("fat").containsBlock(RDDBlockId(4, 0)))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo2, taskMetrics2))
+    postUpdateBlock(listener, droppedBlockInfo2)
     assert(listener.executorIdToStorageStatus("big").numBlocks === 1)
     assert(listener.executorIdToStorageStatus("fat").numBlocks === 0)
     assert(!listener.executorIdToStorageStatus("big").containsBlock(RDDBlockId(1, 1)))
@@ -134,15 +137,14 @@ class StorageStatusListenerSuite extends SparkFunSuite {
   test("unpersist RDD") {
     val listener = new StorageStatusListener(conf)
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
-    val taskMetrics1 = new TaskMetrics
-    val taskMetrics2 = new TaskMetrics
-    val block1 = (RDDBlockId(1, 1), BlockStatus(StorageLevel.DISK_ONLY, 0L, 100L))
-    val block2 = (RDDBlockId(1, 2), BlockStatus(StorageLevel.DISK_ONLY, 0L, 200L))
-    val block3 = (RDDBlockId(4, 0), BlockStatus(StorageLevel.DISK_ONLY, 0L, 300L))
-    taskMetrics1.setUpdatedBlockStatuses(Seq(block1, block2))
-    taskMetrics2.setUpdatedBlockStatuses(Seq(block3))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics1))
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo1, taskMetrics2))
+    val blockUpdateInfos1 = Seq(
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 1), StorageLevel.DISK_ONLY, 0L, 100L),
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 2), StorageLevel.DISK_ONLY, 0L, 200L)
+    )
+    val blockUpdateInfos2 =
+      Seq(BlockUpdatedInfo(bm1, RDDBlockId(4, 0), StorageLevel.DISK_ONLY, 0L, 300L))
+    postUpdateBlock(listener, blockUpdateInfos1)
+    postUpdateBlock(listener, blockUpdateInfos2)
     assert(listener.executorIdToStorageStatus("big").numBlocks === 3)
 
     // Unpersist RDD
@@ -155,4 +157,11 @@ class StorageStatusListenerSuite extends SparkFunSuite {
     listener.onUnpersistRDD(SparkListenerUnpersistRDD(1))
     assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
   }
+
+  private def postUpdateBlock(
+      listener: StorageStatusListener, updateBlockInfos: Seq[BlockUpdatedInfo]): Unit = {
+    updateBlockInfos.foreach { updateBlockInfo =>
+      listener.onBlockUpdated(SparkListenerBlockUpdated(updateBlockInfo))
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
index 6b7c538ac8549..7d77deeb60618 100644
--- a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
@@ -106,7 +106,7 @@ class StorageTabSuite extends SparkFunSuite with BeforeAndAfter {
     assert(storageListener.rddInfoList.size === 0)
   }
 
-  test("task end") {
+  test("block update") {
     val myRddInfo0 = rddInfo0
     val myRddInfo1 = rddInfo1
     val myRddInfo2 = rddInfo2
@@ -120,19 +120,13 @@ class StorageTabSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!storageListener._rddInfoMap(1).isCached)
     assert(!storageListener._rddInfoMap(2).isCached)
 
-    // Task end with no updated blocks. This should not change anything.
-    bus.postToAll(SparkListenerTaskEnd(0, 0, "obliteration", Success, taskInfo, new TaskMetrics))
-    assert(storageListener._rddInfoMap.size === 3)
-    assert(storageListener.rddInfoList.size === 0)
-
-    // Task end with a few new persisted blocks, some from the same RDD
-    val metrics1 = new TaskMetrics
-    metrics1.setUpdatedBlockStatuses(Seq(
-      (RDDBlockId(0, 100), BlockStatus(memAndDisk, 400L, 0L)),
-      (RDDBlockId(0, 101), BlockStatus(memAndDisk, 0L, 400L)),
-      (RDDBlockId(1, 20), BlockStatus(memAndDisk, 0L, 240L))
-    ))
-    bus.postToAll(SparkListenerTaskEnd(1, 0, "obliteration", Success, taskInfo, metrics1))
+    // Some blocks updated
+    val blockUpdateInfos = Seq(
+      BlockUpdatedInfo(bm1, RDDBlockId(0, 100), memAndDisk, 400L, 0L),
+      BlockUpdatedInfo(bm1, RDDBlockId(0, 101), memAndDisk, 0L, 400L),
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 20), memAndDisk, 0L, 240L)
+    )
+    postUpdateBlocks(bus, blockUpdateInfos)
     assert(storageListener._rddInfoMap(0).memSize === 400L)
     assert(storageListener._rddInfoMap(0).diskSize === 400L)
     assert(storageListener._rddInfoMap(0).numCachedPartitions === 2)
@@ -144,15 +138,14 @@ class StorageTabSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!storageListener._rddInfoMap(2).isCached)
     assert(storageListener._rddInfoMap(2).numCachedPartitions === 0)
 
-    // Task end with a few dropped blocks
-    val metrics2 = new TaskMetrics
-    metrics2.setUpdatedBlockStatuses(Seq(
-      (RDDBlockId(0, 100), BlockStatus(none, 0L, 0L)),
-      (RDDBlockId(1, 20), BlockStatus(none, 0L, 0L)),
-      (RDDBlockId(2, 40), BlockStatus(none, 0L, 0L)), // doesn't actually exist
-      (RDDBlockId(4, 80), BlockStatus(none, 0L, 0L)) // doesn't actually exist
-    ))
-    bus.postToAll(SparkListenerTaskEnd(2, 0, "obliteration", Success, taskInfo, metrics2))
+    // Drop some blocks
+    val blockUpdateInfos2 = Seq(
+      BlockUpdatedInfo(bm1, RDDBlockId(0, 100), none, 0L, 0L),
+      BlockUpdatedInfo(bm1, RDDBlockId(1, 20), none, 0L, 0L),
+      BlockUpdatedInfo(bm1, RDDBlockId(2, 40), none, 0L, 0L), // doesn't actually exist
+      BlockUpdatedInfo(bm1, RDDBlockId(4, 80), none, 0L, 0L) // doesn't actually exist
+    )
+    postUpdateBlocks(bus, blockUpdateInfos2)
     assert(storageListener._rddInfoMap(0).memSize === 0L)
     assert(storageListener._rddInfoMap(0).diskSize === 400L)
     assert(storageListener._rddInfoMap(0).numCachedPartitions === 1)
@@ -169,24 +162,27 @@ class StorageTabSuite extends SparkFunSuite with BeforeAndAfter {
     val rddInfo1 = new RDDInfo(1, "rdd1", 1, memOnly, Seq(4))
     val stageInfo0 = new StageInfo(0, 0, "stage0", 1, Seq(rddInfo0), Seq.empty, "details")
     val stageInfo1 = new StageInfo(1, 0, "stage1", 1, Seq(rddInfo1), Seq.empty, "details")
-    val taskMetrics0 = new TaskMetrics
-    val taskMetrics1 = new TaskMetrics
-    val block0 = (RDDBlockId(0, 1), BlockStatus(memOnly, 100L, 0L))
-    val block1 = (RDDBlockId(1, 1), BlockStatus(memOnly, 200L, 0L))
-    taskMetrics0.setUpdatedBlockStatuses(Seq(block0))
-    taskMetrics1.setUpdatedBlockStatuses(Seq(block1))
+    val blockUpdateInfos1 = Seq(BlockUpdatedInfo(bm1, RDDBlockId(0, 1), memOnly, 100L, 0L))
+    val blockUpdateInfos2 = Seq(BlockUpdatedInfo(bm1, RDDBlockId(1, 1), memOnly, 200L, 0L))
     bus.postToAll(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     bus.postToAll(SparkListenerStageSubmitted(stageInfo0))
     assert(storageListener.rddInfoList.size === 0)
-    bus.postToAll(SparkListenerTaskEnd(0, 0, "big", Success, taskInfo, taskMetrics0))
+    postUpdateBlocks(bus, blockUpdateInfos1)
     assert(storageListener.rddInfoList.size === 1)
     bus.postToAll(SparkListenerStageSubmitted(stageInfo1))
     assert(storageListener.rddInfoList.size === 1)
     bus.postToAll(SparkListenerStageCompleted(stageInfo0))
     assert(storageListener.rddInfoList.size === 1)
-    bus.postToAll(SparkListenerTaskEnd(1, 0, "small", Success, taskInfo1, taskMetrics1))
+    postUpdateBlocks(bus, blockUpdateInfos2)
     assert(storageListener.rddInfoList.size === 2)
     bus.postToAll(SparkListenerStageCompleted(stageInfo1))
     assert(storageListener.rddInfoList.size === 2)
   }
+
+  private def postUpdateBlocks(
+      bus: SparkListenerBus, blockUpdateInfos: Seq[BlockUpdatedInfo]): Unit = {
+    blockUpdateInfos.foreach { blockUpdateInfo =>
+      bus.postToAll(SparkListenerBlockUpdated(blockUpdateInfo))
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala b/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
new file mode 100644
index 0000000000000..39b31f8ddeaba
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.concurrent.{CountDownLatch, TimeUnit}
+
+import scala.util.Random
+
+import com.google.common.util.concurrent.Uninterruptibles
+
+import org.apache.spark.SparkFunSuite
+
+class UninterruptibleThreadSuite extends SparkFunSuite {
+
+  /** Sleep millis and return true if it's interrupted */
+  private def sleep(millis: Long): Boolean = {
+    try {
+      Thread.sleep(millis)
+      false
+    } catch {
+      case _: InterruptedException =>
+        true
+    }
+  }
+
+  test("interrupt when runUninterruptibly is running") {
+    val enterRunUninterruptibly = new CountDownLatch(1)
+    @volatile var hasInterruptedException = false
+    @volatile var interruptStatusBeforeExit = false
+    val t = new UninterruptibleThread("test") {
+      override def run(): Unit = {
+        runUninterruptibly {
+          enterRunUninterruptibly.countDown()
+          hasInterruptedException = sleep(1000)
+        }
+        interruptStatusBeforeExit = Thread.interrupted()
+      }
+    }
+    t.start()
+    assert(enterRunUninterruptibly.await(10, TimeUnit.SECONDS), "await timeout")
+    t.interrupt()
+    t.join()
+    assert(hasInterruptedException === false)
+    assert(interruptStatusBeforeExit === true)
+  }
+
+  test("interrupt before runUninterruptibly runs") {
+    val interruptLatch = new CountDownLatch(1)
+    @volatile var hasInterruptedException = false
+    @volatile var interruptStatusBeforeExit = false
+    val t = new UninterruptibleThread("test") {
+      override def run(): Unit = {
+        Uninterruptibles.awaitUninterruptibly(interruptLatch, 10, TimeUnit.SECONDS)
+        try {
+          runUninterruptibly {
+            assert(false, "Should not reach here")
+          }
+        } catch {
+          case _: InterruptedException => hasInterruptedException = true
+        }
+        interruptStatusBeforeExit = Thread.interrupted()
+      }
+    }
+    t.start()
+    t.interrupt()
+    interruptLatch.countDown()
+    t.join()
+    assert(hasInterruptedException === true)
+    assert(interruptStatusBeforeExit === false)
+  }
+
+  test("nested runUninterruptibly") {
+    val enterRunUninterruptibly = new CountDownLatch(1)
+    val interruptLatch = new CountDownLatch(1)
+    @volatile var hasInterruptedException = false
+    @volatile var interruptStatusBeforeExit = false
+    val t = new UninterruptibleThread("test") {
+      override def run(): Unit = {
+        runUninterruptibly {
+          enterRunUninterruptibly.countDown()
+          Uninterruptibles.awaitUninterruptibly(interruptLatch, 10, TimeUnit.SECONDS)
+          hasInterruptedException = sleep(1)
+          runUninterruptibly {
+            if (sleep(1)) {
+              hasInterruptedException = true
+            }
+          }
+          if (sleep(1)) {
+            hasInterruptedException = true
+          }
+        }
+        interruptStatusBeforeExit = Thread.interrupted()
+      }
+    }
+    t.start()
+    assert(enterRunUninterruptibly.await(10, TimeUnit.SECONDS), "await timeout")
+    t.interrupt()
+    interruptLatch.countDown()
+    t.join()
+    assert(hasInterruptedException === false)
+    assert(interruptStatusBeforeExit === true)
+  }
+
+  test("stress test") {
+    @volatile var hasInterruptedException = false
+    val t = new UninterruptibleThread("test") {
+      override def run(): Unit = {
+        for (i <- 0 until 100) {
+          try {
+            runUninterruptibly {
+              if (sleep(Random.nextInt(10))) {
+                hasInterruptedException = true
+              }
+              runUninterruptibly {
+                if (sleep(Random.nextInt(10))) {
+                  hasInterruptedException = true
+                }
+              }
+              if (sleep(Random.nextInt(10))) {
+                hasInterruptedException = true
+              }
+            }
+            Uninterruptibles.sleepUninterruptibly(Random.nextInt(10), TimeUnit.MILLISECONDS)
+            // 50% chance to clear the interrupted status
+            if (Random.nextBoolean()) {
+              Thread.interrupted()
+            }
+          } catch {
+            case _: InterruptedException =>
+              // The first runUninterruptibly may throw InterruptedException if the interrupt status
+              // is set before running `f`.
+          }
+        }
+      }
+    }
+    t.start()
+    for (i <- 0 until 400) {
+      Thread.sleep(Random.nextInt(10))
+      t.interrupt()
+    }
+    t.join()
+    assert(hasInterruptedException === false)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index 791491daf0817..7eb2f56c20585 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -129,6 +129,13 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     t(m / 2)
   }
 
+  def replacementSampling(data: Iterator[Int], sampler: PoissonSampler[Int]): Iterator[Int] = {
+    data.flatMap { item =>
+      val count = sampler.sample()
+      if (count == 0) Iterator.empty else Iterator.fill(count)(item)
+    }
+  }
+
   test("utilities") {
     val s1 = Array(0, 1, 1, 0, 2)
     val s2 = Array(1, 0, 3, 2, 1)
@@ -189,6 +196,36 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     d should be > D
   }
 
+  test("bernoulli sampling without iterator") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    val data = Iterator.from(0)
+
+    var sampler: RandomSampler[Int, Int] = new BernoulliSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.5)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.7)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.7)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.9)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new BernoulliSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
   test("bernoulli sampling with gap sampling optimization") {
     // Tests expect maximum gap sampling fraction to be this value
     RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
@@ -217,6 +254,37 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     d should be > D
   }
 
+  test("bernoulli sampling (without iterator) with gap sampling optimization") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    val data = Iterator.from(0)
+
+    var sampler: RandomSampler[Int, Int] = new BernoulliSampler[Int](0.01)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)),
+      gaps(sample(Iterator.from(0), 0.01)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.3)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new BernoulliSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.4)))
+    d should be > D
+  }
+
   test("bernoulli boundary cases") {
     val data = (1 to 100).toArray
 
@@ -233,6 +301,22 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     sampler.sample(data.iterator).toArray should be (data)
   }
 
+  test("bernoulli (without iterator) boundary cases") {
+    val data = (1 to 100).toArray
+
+    var sampler = new BernoulliSampler[Int](0.0)
+    data.filter(_ => sampler.sample() > 0) should be (Array.empty[Int])
+
+    sampler = new BernoulliSampler[Int](1.0)
+    data.filter(_ => sampler.sample() > 0) should be (data)
+
+    sampler = new BernoulliSampler[Int](0.0 - (RandomSampler.roundingEpsilon / 2.0))
+    data.filter(_ => sampler.sample() > 0) should be (Array.empty[Int])
+
+    sampler = new BernoulliSampler[Int](1.0 + (RandomSampler.roundingEpsilon / 2.0))
+    data.filter(_ => sampler.sample() > 0) should be (data)
+  }
+
   test("bernoulli data types") {
     // Tests expect maximum gap sampling fraction to be this value
     RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
@@ -341,6 +425,36 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     d should be > D
   }
 
+  test("replacement sampling without iterator") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    val data = Iterator.from(0)
+
+    var sampler = new PoissonSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.5)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.7)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.7)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.9)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.9)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new PoissonSampler[Int](0.5)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.6)))
+    d should be > D
+  }
+
   test("replacement sampling with gap sampling") {
     // Tests expect maximum gap sampling fraction to be this value
     RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
@@ -369,6 +483,36 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     d should be > D
   }
 
+  test("replacement sampling (without iterator) with gap sampling") {
+    // Tests expect maximum gap sampling fraction to be this value
+    RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
+
+    var d: Double = 0.0
+
+    val data = Iterator.from(0)
+
+    var sampler = new PoissonSampler[Int](0.01)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.01)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.1)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new PoissonSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.3)))
+    d should be < D
+
+    // sampling at different frequencies should show up as statistically different:
+    sampler = new PoissonSampler[Int](0.3)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(replacementSampling(data, sampler)), gaps(sampleWR(Iterator.from(0), 0.4)))
+    d should be > D
+  }
+
   test("replacement boundary cases") {
     val data = (1 to 100).toArray
 
@@ -383,6 +527,20 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     sampler.sample(data.iterator).length should be > (data.length)
   }
 
+  test("replacement (without) boundary cases") {
+    val data = (1 to 100).toArray
+
+    var sampler = new PoissonSampler[Int](0.0)
+    replacementSampling(data.iterator, sampler).toArray should be (Array.empty[Int])
+
+    sampler = new PoissonSampler[Int](0.0 - (RandomSampler.roundingEpsilon / 2.0))
+    replacementSampling(data.iterator, sampler).toArray should be (Array.empty[Int])
+
+    // sampling with replacement has no upper bound on sampling fraction
+    sampler = new PoissonSampler[Int](2.0)
+    replacementSampling(data.iterator, sampler).length should be > (data.length)
+  }
+
   test("replacement data types") {
     // Tests expect maximum gap sampling fraction to be this value
     RandomSampler.defaultMaxGapSamplingFraction should be (0.4)
@@ -477,6 +635,22 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     d should be < D
   }
 
+  test("bernoulli partitioning sampling without iterator") {
+    var d: Double = 0.0
+
+    val data = Iterator.from(0)
+
+    var sampler = new BernoulliCellSampler[Int](0.1, 0.2)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.1)))
+    d should be < D
+
+    sampler = new BernoulliCellSampler[Int](0.1, 0.2, true)
+    sampler.setSeed(rngSeed.nextLong)
+    d = medianKSD(gaps(data.filter(_ => sampler.sample() > 0)), gaps(sample(Iterator.from(0), 0.9)))
+    d should be < D
+  }
+
   test("bernoulli partitioning boundary cases") {
     val data = (1 to 100).toArray
     val d = RandomSampler.roundingEpsilon / 2.0
@@ -500,6 +674,29 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
     sampler.sample(data.iterator).toArray should be (Array.empty[Int])
   }
 
+  test("bernoulli partitioning (without iterator) boundary cases") {
+    val data = (1 to 100).toArray
+    val d = RandomSampler.roundingEpsilon / 2.0
+
+    var sampler = new BernoulliCellSampler[Int](0.0, 0.0)
+    data.filter(_ => sampler.sample() > 0).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5)
+    data.filter(_ => sampler.sample() > 0).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](1.0, 1.0)
+    data.filter(_ => sampler.sample() > 0).toArray should be (Array.empty[Int])
+
+    sampler = new BernoulliCellSampler[Int](0.0, 1.0)
+    data.filter(_ => sampler.sample() > 0).toArray should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.0 - d, 1.0 + d)
+    data.filter(_ => sampler.sample() > 0).toArray should be (data)
+
+    sampler = new BernoulliCellSampler[Int](0.5, 0.5 - d)
+    data.filter(_ => sampler.sample() > 0).toArray should be (Array.empty[Int])
+  }
+
   test("bernoulli partitioning data") {
     val seed = rngSeed.nextLong
     val data = (1 to 100).toArray
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 512675a599b3c..7c2f88bdb13e2 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -3,6 +3,7 @@ RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
 activation-1.1.jar
 antlr-runtime-3.5.2.jar
+antlr4-runtime-4.5.2-1.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 31f8694fedfcd..f4d600038d204 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -3,6 +3,7 @@ RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
 activation-1.1.1.jar
 antlr-runtime-3.5.2.jar
+antlr4-runtime-4.5.2-1.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 0fa8bccab0317..7c5e2c35bd1f4 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -3,6 +3,7 @@ RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
 activation-1.1.1.jar
 antlr-runtime-3.5.2.jar
+antlr4-runtime-4.5.2-1.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 arpack_combined_all-0.1.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 8d2f6e6e32ab9..03d9a51057369 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -3,6 +3,7 @@ RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
 activation-1.1.1.jar
 antlr-runtime-3.5.2.jar
+antlr4-runtime-4.5.2-1.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 apacheds-i18n-2.0.0-M15.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index a114c4ae8dba0..5765071a1c400 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -3,6 +3,7 @@ RoaringBitmap-0.5.11.jar
 ST4-4.0.4.jar
 activation-1.1.1.jar
 antlr-runtime-3.5.2.jar
+antlr4-runtime-4.5.2-1.jar
 aopalliance-1.0.jar
 apache-log4j-extras-1.2.17.jar
 apacheds-i18n-2.0.0-M15.jar
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index fcb1b5999fae7..a783fe305f904 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -276,7 +276,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
         if (Random.nextDouble() < probability) { Some(vidVvals._1) }
         else { None }
       }
-      if (selectedVertices.count > 1) {
+      if (selectedVertices.count > 0) {
         found = true
         val collectedVertices = selectedVertices.collect()
         retVal = collectedVertices(Random.nextInt(collectedVertices.length))
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index cb981797d3239..96aa262a395c8 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -404,4 +404,13 @@ class GraphSuite extends SparkFunSuite with LocalSparkContext {
       assert(sc.getPersistentRDDs.isEmpty)
     }
   }
+
+  test("SPARK-14219: pickRandomVertex") {
+    withSpark { sc =>
+      val vert = sc.parallelize(List((1L, "a")), 1)
+      val edges = sc.parallelize(List(Edge[Long](1L, 1L)), 1)
+      val g0 = Graph(vert, edges)
+      assert(g0.pickRandomVertex() === 1L)
+    }
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 963f81cb3ec39..040b0093b9495 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -19,25 +19,19 @@ package org.apache.spark.ml.tuning
 
 import com.github.fommil.netlib.F2jBLAS
 import org.apache.hadoop.fs.Path
-import org.json4s.{DefaultFormats, JObject}
-import org.json4s.jackson.JsonMethods._
+import org.json4s.DefaultFormats
 
-import org.apache.spark.SparkContext
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
-import org.apache.spark.ml.classification.OneVsRestParams
 import org.apache.spark.ml.evaluation.Evaluator
-import org.apache.spark.ml.feature.RFormulaModel
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.HasSeed
 import org.apache.spark.ml.util._
-import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.types.StructType
 
-
 /**
  * Params for [[CrossValidator]] and [[CrossValidatorModel]].
  */
@@ -45,6 +39,7 @@ private[ml] trait CrossValidatorParams extends ValidatorParams with HasSeed {
   /**
    * Param for number of folds for cross validation.  Must be >= 2.
    * Default: 3
+   *
    * @group param
    */
   val numFolds: IntParam = new IntParam(this, "numFolds",
@@ -163,10 +158,10 @@ object CrossValidator extends MLReadable[CrossValidator] {
 
   private[CrossValidator] class CrossValidatorWriter(instance: CrossValidator) extends MLWriter {
 
-    SharedReadWrite.validateParams(instance)
+    ValidatorParams.validateParams(instance)
 
     override protected def saveImpl(path: String): Unit =
-      SharedReadWrite.saveImpl(path, instance, sc)
+      ValidatorParams.saveImpl(path, instance, sc)
   }
 
   private class CrossValidatorReader extends MLReader[CrossValidator] {
@@ -175,8 +170,11 @@ object CrossValidator extends MLReadable[CrossValidator] {
     private val className = classOf[CrossValidator].getName
 
     override def load(path: String): CrossValidator = {
-      val (metadata, estimator, evaluator, estimatorParamMaps, numFolds) =
-        SharedReadWrite.load(path, sc, className)
+      implicit val format = DefaultFormats
+
+      val (metadata, estimator, evaluator, estimatorParamMaps) =
+        ValidatorParams.loadImpl(path, sc, className)
+      val numFolds = (metadata.params \ "numFolds").extract[Int]
       new CrossValidator(metadata.uid)
         .setEstimator(estimator)
         .setEvaluator(evaluator)
@@ -184,123 +182,6 @@ object CrossValidator extends MLReadable[CrossValidator] {
         .setNumFolds(numFolds)
     }
   }
-
-  private object CrossValidatorReader {
-    /**
-     * Examine the given estimator (which may be a compound estimator) and extract a mapping
-     * from UIDs to corresponding [[Params]] instances.
-     */
-    def getUidMap(instance: Params): Map[String, Params] = {
-      val uidList = getUidMapImpl(instance)
-      val uidMap = uidList.toMap
-      if (uidList.size != uidMap.size) {
-        throw new RuntimeException("CrossValidator.load found a compound estimator with stages" +
-          s" with duplicate UIDs.  List of UIDs: ${uidList.map(_._1).mkString(", ")}")
-      }
-      uidMap
-    }
-
-    def getUidMapImpl(instance: Params): List[(String, Params)] = {
-      val subStages: Array[Params] = instance match {
-        case p: Pipeline => p.getStages.asInstanceOf[Array[Params]]
-        case pm: PipelineModel => pm.stages.asInstanceOf[Array[Params]]
-        case v: ValidatorParams => Array(v.getEstimator, v.getEvaluator)
-        case ovr: OneVsRestParams =>
-          // TODO: SPARK-11892: This case may require special handling.
-          throw new UnsupportedOperationException("CrossValidator write will fail because it" +
-            " cannot yet handle an estimator containing type: ${ovr.getClass.getName}")
-        case rformModel: RFormulaModel => Array(rformModel.pipelineModel)
-        case _: Params => Array()
-      }
-      val subStageMaps = subStages.map(getUidMapImpl).foldLeft(List.empty[(String, Params)])(_ ++ _)
-      List((instance.uid, instance)) ++ subStageMaps
-    }
-  }
-
-  private[tuning] object SharedReadWrite {
-
-    /**
-     * Check that [[CrossValidator.evaluator]] and [[CrossValidator.estimator]] are Writable.
-     * This does not check [[CrossValidator.estimatorParamMaps]].
-     */
-    def validateParams(instance: ValidatorParams): Unit = {
-      def checkElement(elem: Params, name: String): Unit = elem match {
-        case stage: MLWritable => // good
-        case other =>
-          throw new UnsupportedOperationException("CrossValidator write will fail " +
-            s" because it contains $name which does not implement Writable." +
-            s" Non-Writable $name: ${other.uid} of type ${other.getClass}")
-      }
-      checkElement(instance.getEvaluator, "evaluator")
-      checkElement(instance.getEstimator, "estimator")
-      // Check to make sure all Params apply to this estimator.  Throw an error if any do not.
-      // Extraneous Params would cause problems when loading the estimatorParamMaps.
-      val uidToInstance: Map[String, Params] = CrossValidatorReader.getUidMap(instance)
-      instance.getEstimatorParamMaps.foreach { case pMap: ParamMap =>
-        pMap.toSeq.foreach { case ParamPair(p, v) =>
-          require(uidToInstance.contains(p.parent), s"CrossValidator save requires all Params in" +
-            s" estimatorParamMaps to apply to this CrossValidator, its Estimator, or its" +
-            s" Evaluator.  An extraneous Param was found: $p")
-        }
-      }
-    }
-
-    private[tuning] def saveImpl(
-        path: String,
-        instance: CrossValidatorParams,
-        sc: SparkContext,
-        extraMetadata: Option[JObject] = None): Unit = {
-      import org.json4s.JsonDSL._
-
-      val estimatorParamMapsJson = compact(render(
-        instance.getEstimatorParamMaps.map { case paramMap =>
-          paramMap.toSeq.map { case ParamPair(p, v) =>
-            Map("parent" -> p.parent, "name" -> p.name, "value" -> p.jsonEncode(v))
-          }
-        }.toSeq
-      ))
-      val jsonParams = List(
-        "numFolds" -> parse(instance.numFolds.jsonEncode(instance.getNumFolds)),
-        "estimatorParamMaps" -> parse(estimatorParamMapsJson)
-      )
-      DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, Some(jsonParams))
-
-      val evaluatorPath = new Path(path, "evaluator").toString
-      instance.getEvaluator.asInstanceOf[MLWritable].save(evaluatorPath)
-      val estimatorPath = new Path(path, "estimator").toString
-      instance.getEstimator.asInstanceOf[MLWritable].save(estimatorPath)
-    }
-
-    private[tuning] def load[M <: Model[M]](
-        path: String,
-        sc: SparkContext,
-        expectedClassName: String): (Metadata, Estimator[M], Evaluator, Array[ParamMap], Int) = {
-
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, expectedClassName)
-
-      implicit val format = DefaultFormats
-      val evaluatorPath = new Path(path, "evaluator").toString
-      val evaluator = DefaultParamsReader.loadParamsInstance[Evaluator](evaluatorPath, sc)
-      val estimatorPath = new Path(path, "estimator").toString
-      val estimator = DefaultParamsReader.loadParamsInstance[Estimator[M]](estimatorPath, sc)
-
-      val uidToParams = Map(evaluator.uid -> evaluator) ++ CrossValidatorReader.getUidMap(estimator)
-
-      val numFolds = (metadata.params \ "numFolds").extract[Int]
-      val estimatorParamMaps: Array[ParamMap] =
-        (metadata.params \ "estimatorParamMaps").extract[Seq[Seq[Map[String, String]]]].map {
-          pMap =>
-            val paramPairs = pMap.map { case pInfo: Map[String, String] =>
-              val est = uidToParams(pInfo("parent"))
-              val param = est.getParam(pInfo("name"))
-              val value = param.jsonDecode(pInfo("value"))
-              param -> value
-            }
-            ParamMap(paramPairs: _*)
-        }.toArray
-      (metadata, estimator, evaluator, estimatorParamMaps, numFolds)
-    }
-  }
 }
 
 /**
@@ -346,8 +227,6 @@ class CrossValidatorModel private[ml] (
 @Since("1.6.0")
 object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
 
-  import CrossValidator.SharedReadWrite
-
   @Since("1.6.0")
   override def read: MLReader[CrossValidatorModel] = new CrossValidatorModelReader
 
@@ -357,12 +236,12 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
   private[CrossValidatorModel]
   class CrossValidatorModelWriter(instance: CrossValidatorModel) extends MLWriter {
 
-    SharedReadWrite.validateParams(instance)
+    ValidatorParams.validateParams(instance)
 
     override protected def saveImpl(path: String): Unit = {
       import org.json4s.JsonDSL._
       val extraMetadata = "avgMetrics" -> instance.avgMetrics.toSeq
-      SharedReadWrite.saveImpl(path, instance, sc, Some(extraMetadata))
+      ValidatorParams.saveImpl(path, instance, sc, Some(extraMetadata))
       val bestModelPath = new Path(path, "bestModel").toString
       instance.bestModel.asInstanceOf[MLWritable].save(bestModelPath)
     }
@@ -376,8 +255,9 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
     override def load(path: String): CrossValidatorModel = {
       implicit val format = DefaultFormats
 
-      val (metadata, estimator, evaluator, estimatorParamMaps, numFolds) =
-        SharedReadWrite.load(path, sc, className)
+      val (metadata, estimator, evaluator, estimatorParamMaps) =
+        ValidatorParams.loadImpl(path, sc, className)
+      val numFolds = (metadata.params \ "numFolds").extract[Int]
       val bestModelPath = new Path(path, "bestModel").toString
       val bestModel = DefaultParamsReader.loadParamsInstance[Model[_]](bestModelPath, sc)
       val avgMetrics = (metadata.metadata \ "avgMetrics").extract[Seq[Double]].toArray
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 70fa5f0234753..4d1d6364d7ad8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.ml.tuning
 
+import org.apache.hadoop.fs.Path
+import org.json4s.DefaultFormats
+
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.param.{DoubleParam, ParamMap, ParamValidators}
-import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.ml.util._
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.types.StructType
 
@@ -33,6 +36,7 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams {
   /**
    * Param for ratio between train and validation data. Must be between 0 and 1.
    * Default: 0.75
+   *
    * @group param
    */
   val trainRatio: DoubleParam = new DoubleParam(this, "trainRatio",
@@ -55,7 +59,7 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams {
 @Experimental
 class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Estimator[TrainValidationSplitModel]
-  with TrainValidationSplitParams with Logging {
+  with TrainValidationSplitParams with MLWritable with Logging {
 
   @Since("1.5.0")
   def this() = this(Identifiable.randomUID("tvs"))
@@ -130,6 +134,47 @@ class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: St
     }
     copied
   }
+
+  @Since("2.0.0")
+  override def write: MLWriter = new TrainValidationSplit.TrainValidationSplitWriter(this)
+}
+
+@Since("2.0.0")
+object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
+
+  @Since("2.0.0")
+  override def read: MLReader[TrainValidationSplit] = new TrainValidationSplitReader
+
+  @Since("2.0.0")
+  override def load(path: String): TrainValidationSplit = super.load(path)
+
+  private[TrainValidationSplit] class TrainValidationSplitWriter(instance: TrainValidationSplit)
+    extends MLWriter {
+
+    ValidatorParams.validateParams(instance)
+
+    override protected def saveImpl(path: String): Unit =
+      ValidatorParams.saveImpl(path, instance, sc)
+  }
+
+  private class TrainValidationSplitReader extends MLReader[TrainValidationSplit] {
+
+    /** Checked against metadata when loading model */
+    private val className = classOf[TrainValidationSplit].getName
+
+    override def load(path: String): TrainValidationSplit = {
+      implicit val format = DefaultFormats
+
+      val (metadata, estimator, evaluator, estimatorParamMaps) =
+        ValidatorParams.loadImpl(path, sc, className)
+      val trainRatio = (metadata.params \ "trainRatio").extract[Double]
+      new TrainValidationSplit(metadata.uid)
+        .setEstimator(estimator)
+        .setEvaluator(evaluator)
+        .setEstimatorParamMaps(estimatorParamMaps)
+        .setTrainRatio(trainRatio)
+    }
+  }
 }
 
 /**
@@ -146,7 +191,7 @@ class TrainValidationSplitModel private[ml] (
     @Since("1.5.0") override val uid: String,
     @Since("1.5.0") val bestModel: Model[_],
     @Since("1.5.0") val validationMetrics: Array[Double])
-  extends Model[TrainValidationSplitModel] with TrainValidationSplitParams {
+  extends Model[TrainValidationSplitModel] with TrainValidationSplitParams with MLWritable {
 
   @Since("1.5.0")
   override def transform(dataset: DataFrame): DataFrame = {
@@ -167,4 +212,53 @@ class TrainValidationSplitModel private[ml] (
       validationMetrics.clone())
     copyValues(copied, extra)
   }
+
+  @Since("2.0.0")
+  override def write: MLWriter = new TrainValidationSplitModel.TrainValidationSplitModelWriter(this)
+}
+
+@Since("2.0.0")
+object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
+
+  @Since("2.0.0")
+  override def read: MLReader[TrainValidationSplitModel] = new TrainValidationSplitModelReader
+
+  @Since("2.0.0")
+  override def load(path: String): TrainValidationSplitModel = super.load(path)
+
+  private[TrainValidationSplitModel]
+  class TrainValidationSplitModelWriter(instance: TrainValidationSplitModel) extends MLWriter {
+
+    ValidatorParams.validateParams(instance)
+
+    override protected def saveImpl(path: String): Unit = {
+      import org.json4s.JsonDSL._
+      val extraMetadata = "validationMetrics" -> instance.validationMetrics.toSeq
+      ValidatorParams.saveImpl(path, instance, sc, Some(extraMetadata))
+      val bestModelPath = new Path(path, "bestModel").toString
+      instance.bestModel.asInstanceOf[MLWritable].save(bestModelPath)
+    }
+  }
+
+  private class TrainValidationSplitModelReader extends MLReader[TrainValidationSplitModel] {
+
+    /** Checked against metadata when loading model */
+    private val className = classOf[TrainValidationSplitModel].getName
+
+    override def load(path: String): TrainValidationSplitModel = {
+      implicit val format = DefaultFormats
+
+      val (metadata, estimator, evaluator, estimatorParamMaps) =
+        ValidatorParams.loadImpl(path, sc, className)
+      val trainRatio = (metadata.params \ "trainRatio").extract[Double]
+      val bestModelPath = new Path(path, "bestModel").toString
+      val bestModel = DefaultParamsReader.loadParamsInstance[Model[_]](bestModelPath, sc)
+      val validationMetrics = (metadata.metadata \ "validationMetrics").extract[Seq[Double]].toArray
+      val tvs = new TrainValidationSplitModel(metadata.uid, bestModel, validationMetrics)
+      tvs.set(tvs.estimator, estimator)
+        .set(tvs.evaluator, evaluator)
+        .set(tvs.estimatorParamMaps, estimatorParamMaps)
+        .set(tvs.trainRatio, trainRatio)
+    }
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
index 953456e8f0dca..7a4e106aeb999 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
@@ -17,9 +17,17 @@
 
 package org.apache.spark.ml.tuning
 
-import org.apache.spark.ml.Estimator
+import org.apache.hadoop.fs.Path
+import org.json4s.{DefaultFormats, _}
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.SparkContext
+import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
-import org.apache.spark.ml.param.{Param, ParamMap, Params}
+import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
+import org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter, MetaAlgorithmReadWrite,
+  MLWritable}
+import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -69,3 +77,108 @@ private[ml] trait ValidatorParams extends Params {
     est.copy(firstEstimatorParamMap).transformSchema(schema)
   }
 }
+
+private[ml] object ValidatorParams {
+  /**
+   * Check that [[ValidatorParams.evaluator]] and [[ValidatorParams.estimator]] are Writable.
+   * This does not check [[ValidatorParams.estimatorParamMaps]].
+   */
+  def validateParams(instance: ValidatorParams): Unit = {
+    def checkElement(elem: Params, name: String): Unit = elem match {
+      case stage: MLWritable => // good
+      case other =>
+        throw new UnsupportedOperationException(instance.getClass.getName + " write will fail " +
+          s" because it contains $name which does not implement Writable." +
+          s" Non-Writable $name: ${other.uid} of type ${other.getClass}")
+    }
+    checkElement(instance.getEvaluator, "evaluator")
+    checkElement(instance.getEstimator, "estimator")
+    // Check to make sure all Params apply to this estimator.  Throw an error if any do not.
+    // Extraneous Params would cause problems when loading the estimatorParamMaps.
+    val uidToInstance: Map[String, Params] = MetaAlgorithmReadWrite.getUidMap(instance)
+    instance.getEstimatorParamMaps.foreach { case pMap: ParamMap =>
+      pMap.toSeq.foreach { case ParamPair(p, v) =>
+        require(uidToInstance.contains(p.parent), s"ValidatorParams save requires all Params in" +
+          s" estimatorParamMaps to apply to this ValidatorParams, its Estimator, or its" +
+          s" Evaluator. An extraneous Param was found: $p")
+      }
+    }
+  }
+
+  /**
+   * Generic implementation of save for [[ValidatorParams]] types.
+   * This handles all [[ValidatorParams]] fields and saves [[Param]] values, but the implementing
+   * class needs to handle model data.
+   */
+  def saveImpl(
+      path: String,
+      instance: ValidatorParams,
+      sc: SparkContext,
+      extraMetadata: Option[JObject] = None): Unit = {
+    import org.json4s.JsonDSL._
+
+    val estimatorParamMapsJson = compact(render(
+      instance.getEstimatorParamMaps.map { case paramMap =>
+        paramMap.toSeq.map { case ParamPair(p, v) =>
+          Map("parent" -> p.parent, "name" -> p.name, "value" -> p.jsonEncode(v))
+        }
+      }.toSeq
+    ))
+
+    val validatorSpecificParams = instance match {
+      case cv: CrossValidatorParams =>
+        List("numFolds" -> parse(cv.numFolds.jsonEncode(cv.getNumFolds)))
+      case tvs: TrainValidationSplitParams =>
+        List("trainRatio" -> parse(tvs.trainRatio.jsonEncode(tvs.getTrainRatio)))
+      case _ =>
+        // This should not happen.
+        throw new NotImplementedError("ValidatorParams.saveImpl does not handle type: " +
+          instance.getClass.getCanonicalName)
+    }
+
+    val jsonParams = validatorSpecificParams ++ List(
+      "estimatorParamMaps" -> parse(estimatorParamMapsJson))
+
+    DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, Some(jsonParams))
+
+    val evaluatorPath = new Path(path, "evaluator").toString
+    instance.getEvaluator.asInstanceOf[MLWritable].save(evaluatorPath)
+    val estimatorPath = new Path(path, "estimator").toString
+    instance.getEstimator.asInstanceOf[MLWritable].save(estimatorPath)
+  }
+
+  /**
+   * Generic implementation of load for [[ValidatorParams]] types.
+   * This handles all [[ValidatorParams]] fields, but the implementing
+   * class needs to handle model data and special [[Param]] values.
+   */
+  def loadImpl[M <: Model[M]](
+      path: String,
+      sc: SparkContext,
+      expectedClassName: String): (Metadata, Estimator[M], Evaluator, Array[ParamMap]) = {
+
+    val metadata = DefaultParamsReader.loadMetadata(path, sc, expectedClassName)
+
+    implicit val format = DefaultFormats
+    val evaluatorPath = new Path(path, "evaluator").toString
+    val evaluator = DefaultParamsReader.loadParamsInstance[Evaluator](evaluatorPath, sc)
+    val estimatorPath = new Path(path, "estimator").toString
+    val estimator = DefaultParamsReader.loadParamsInstance[Estimator[M]](estimatorPath, sc)
+
+    val uidToParams = Map(evaluator.uid -> evaluator) ++ MetaAlgorithmReadWrite.getUidMap(estimator)
+
+    val estimatorParamMaps: Array[ParamMap] =
+      (metadata.params \ "estimatorParamMaps").extract[Seq[Seq[Map[String, String]]]].map {
+        pMap =>
+          val paramPairs = pMap.map { case pInfo: Map[String, String] =>
+            val est = uidToParams(pInfo("parent"))
+            val param = est.getParam(pInfo("name"))
+            val value = param.jsonDecode(pInfo("value"))
+            param -> value
+          }
+          ParamMap(paramPairs: _*)
+      }.toArray
+
+    (metadata, estimator, evaluator, estimatorParamMaps)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index c95e536abd10b..5a596cad060e5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -21,13 +21,18 @@ import java.io.IOException
 
 import org.apache.hadoop.fs.Path
 import org.json4s._
-import org.json4s.jackson.JsonMethods._
+import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.internal.Logging
+import org.apache.spark.ml._
+import org.apache.spark.ml.classification.OneVsRestParams
+import org.apache.spark.ml.feature.RFormulaModel
 import org.apache.spark.ml.param.{ParamPair, Params}
+import org.apache.spark.ml.tuning.ValidatorParams
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.util.Utils
 
@@ -352,3 +357,38 @@ private[ml] object DefaultParamsReader {
     cls.getMethod("read").invoke(null).asInstanceOf[MLReader[T]].load(path)
   }
 }
+
+/**
+ * Default Meta-Algorithm read and write implementation.
+ */
+private[ml] object MetaAlgorithmReadWrite {
+  /**
+   * Examine the given estimator (which may be a compound estimator) and extract a mapping
+   * from UIDs to corresponding [[Params]] instances.
+   */
+  def getUidMap(instance: Params): Map[String, Params] = {
+    val uidList = getUidMapImpl(instance)
+    val uidMap = uidList.toMap
+    if (uidList.size != uidMap.size) {
+      throw new RuntimeException(s"${instance.getClass.getName}.load found a compound estimator" +
+        s" with stages with duplicate UIDs. List of UIDs: ${uidList.map(_._1).mkString(", ")}.")
+    }
+    uidMap
+  }
+
+  private def getUidMapImpl(instance: Params): List[(String, Params)] = {
+    val subStages: Array[Params] = instance match {
+      case p: Pipeline => p.getStages.asInstanceOf[Array[Params]]
+      case pm: PipelineModel => pm.stages.asInstanceOf[Array[Params]]
+      case v: ValidatorParams => Array(v.getEstimator, v.getEvaluator)
+      case ovr: OneVsRestParams =>
+        // TODO: SPARK-11892: This case may require special handling.
+        throw new UnsupportedOperationException(s"${instance.getClass.getName} write will fail" +
+          s" because it cannot yet handle an estimator containing type: ${ovr.getClass.getName}.")
+      case rformModel: RFormulaModel => Array(rformModel.pipelineModel)
+      case _: Params => Array()
+    }
+    val subStageMaps = subStages.map(getUidMapImpl).foldLeft(List.empty[(String, Params)])(_ ++ _)
+    List((instance.uid, instance)) ++ subStageMaps
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index a7beb81980299..37a21cd879bfd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -390,6 +390,8 @@ class KMeans private (
     // Initialize each run's first center to a random point.
     val seed = new XORShiftRandom(this.seed).nextInt()
     val sample = data.takeSample(true, runs, seed).toSeq
+    // Could be empty if data is empty; fail with a better message early:
+    require(sample.size >= runs, s"Required $runs samples but got ${sample.size} from $data")
     val newCenters = Array.tabulate(runs)(r => ArrayBuffer(sample(r).toDense))
 
     /** Merges new centers to centers. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index c6de7751f57f4..a09bc65cf3c3d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -613,7 +613,7 @@ class SparseMatrix @Since("1.3.0") (
 
   private[mllib] def update(i: Int, j: Int, v: Double): Unit = {
     val ind = index(i, j)
-    if (ind == -1) {
+    if (ind < 0) {
       throw new NoSuchElementException("The given row and column indices correspond to a zero " +
         "value. Only non-zero elements in Sparse Matrices can be updated.")
     } else {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index cf8dcefebc3aa..7cf7b3e087590 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -19,17 +19,20 @@ package org.apache.spark.ml.tuning
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
+import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.types.StructType
 
-class TrainValidationSplitSuite extends SparkFunSuite with MLlibTestSparkContext {
+class TrainValidationSplitSuite
+  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
   test("train validation with logistic regression") {
     val dataset = sqlContext.createDataFrame(
       sc.parallelize(generateLogisticInput(1.0, 1.0, 100, 42), 2))
@@ -105,6 +108,44 @@ class TrainValidationSplitSuite extends SparkFunSuite with MLlibTestSparkContext
       cv.transformSchema(new StructType())
     }
   }
+
+  test("read/write: TrainValidationSplit") {
+    val lr = new LogisticRegression().setMaxIter(3)
+    val evaluator = new BinaryClassificationEvaluator()
+    val paramMaps = new ParamGridBuilder()
+        .addGrid(lr.regParam, Array(0.1, 0.2))
+        .build()
+    val tvs = new TrainValidationSplit()
+      .setEstimator(lr)
+      .setEvaluator(evaluator)
+      .setTrainRatio(0.5)
+      .setEstimatorParamMaps(paramMaps)
+
+    val tvs2 = testDefaultReadWrite(tvs, testParams = false)
+
+    assert(tvs.getTrainRatio === tvs2.getTrainRatio)
+  }
+
+  test("read/write: TrainValidationSplitModel") {
+    val lr = new LogisticRegression()
+      .setThreshold(0.6)
+    val lrModel = new LogisticRegressionModel(lr.uid, Vectors.dense(1.0, 2.0), 1.2)
+      .setThreshold(0.6)
+    val evaluator = new BinaryClassificationEvaluator()
+    val paramMaps = new ParamGridBuilder()
+        .addGrid(lr.regParam, Array(0.1, 0.2))
+        .build()
+    val tvs = new TrainValidationSplitModel("cvUid", lrModel, Array(0.3, 0.6))
+    tvs.set(tvs.estimator, lr)
+      .set(tvs.evaluator, evaluator)
+      .set(tvs.trainRatio, 0.5)
+      .set(tvs.estimatorParamMaps, paramMaps)
+
+    val tvs2 = testDefaultReadWrite(tvs, testParams = false)
+
+    assert(tvs.getTrainRatio === tvs2.getTrainRatio)
+    assert(tvs.validationMetrics === tvs2.validationMetrics)
+  }
 }
 
 object TrainValidationSplitSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index a02b8c9635ba6..57907f415cd0f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -150,6 +150,10 @@ class MatricesSuite extends SparkFunSuite {
       sparseMat.update(0, 0, 10.0)
     }
 
+    intercept[NoSuchElementException] {
+      sparseMat.update(2, 1, 10.0)
+    }
+
     sparseMat.update(0, 1, 10.0)
     assert(sparseMat(0, 1) === 10.0)
     assert(sparseMat.values(2) === 10.0)
diff --git a/pom.xml b/pom.xml
index b4cfa3a59873e..1513a18b715b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -178,6 +178,7 @@
     <jsr305.version>1.3.9</jsr305.version>
     <libthrift.version>0.9.2</libthrift.version>
     <antlr.version>3.5.2</antlr.version>
+    <antlr4.version>4.5.2-1</antlr4.version>
 
     <test.java.home>${java.home}</test.java.home>
     <test.exclude.tags></test.exclude.tags>
@@ -1759,6 +1760,11 @@
         <artifactId>antlr-runtime</artifactId>
         <version>${antlr.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.antlr</groupId>
+        <artifactId>antlr4-runtime</artifactId>
+        <version>${antlr4.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -1885,6 +1891,11 @@
           <artifactId>antlr3-maven-plugin</artifactId>
           <version>3.5.2</version>
         </plugin>
+        <plugin>
+          <groupId>org.antlr</groupId>
+          <artifactId>antlr4-maven-plugin</artifactId>
+          <version>${antlr4.version}</version>
+        </plugin>
         <!-- Surefire runs all Java tests -->
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 208c7a28cf9bc..94621d7fa3723 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -589,6 +589,9 @@ object MimaExcludes {
         ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.MLUtils.loadLabeledData"),
         ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.optimization.LBFGS.setMaxNumIterations"),
         ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.evaluation.BinaryClassificationEvaluator.setScoreCol")
+      ) ++ Seq(
+        // [SPARK-14205][SQL] remove trait Queryable
+        ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.Dataset")
       )
     case v if v.startsWith("1.6") =>
       Seq(
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index fb229b979d41b..39a9e16f7e182 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -25,6 +25,7 @@ import sbt._
 import sbt.Classpaths.publishTask
 import sbt.Keys._
 import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion
+import com.simplytyped.Antlr4Plugin._
 import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
 import com.typesafe.tools.mima.plugin.MimaKeys
 
@@ -401,7 +402,10 @@ object OldDeps {
 }
 
 object Catalyst {
-  lazy val settings = Seq(
+  lazy val settings = antlr4Settings ++ Seq(
+    antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser.ng"),
+    antlr4GenListener in Antlr4 := true,
+    antlr4GenVisitor in Antlr4 := true,
     // ANTLR code-generation step.
     //
     // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of
@@ -414,7 +418,7 @@ object Catalyst {
         "SparkSqlLexer.g",
         "SparkSqlParser.g")
       val sourceDir = (sourceDirectory in Compile).value / "antlr3"
-      val targetDir = (sourceManaged in Compile).value
+      val targetDir = (sourceManaged in Compile).value / "antlr3"
 
       // Create default ANTLR Tool.
       val antlr = new org.antlr.Tool
diff --git a/project/plugins.sbt b/project/plugins.sbt
index eeca94a47ce79..d9ed7962bf611 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -23,3 +23,9 @@ libraryDependencies += "org.ow2.asm"  % "asm" % "5.0.3"
 libraryDependencies += "org.ow2.asm"  % "asm-commons" % "5.0.3"
 
 libraryDependencies += "org.antlr" % "antlr" % "3.5.2"
+
+
+// TODO I am not sure we want such a dep.
+resolvers += "simplytyped" at "http://simplytyped.github.io/repo/releases"
+
+addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.10")
diff --git a/python/pyspark/join.py b/python/pyspark/join.py
index 94df3990164d6..c1f5362648f6e 100644
--- a/python/pyspark/join.py
+++ b/python/pyspark/join.py
@@ -93,7 +93,7 @@ def dispatch(seq):
             vbuf.append(None)
         if not wbuf:
             wbuf.append(None)
-        return [(v, w) for v in vbuf for w in wbuf]
+        return ((v, w) for v in vbuf for w in wbuf)
     return _do_python_join(rdd, other, numPartitions, dispatch)
 
 
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 83ef76c13cee3..1a5d422af9535 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -51,7 +51,7 @@
 from pyspark.tests import ReusedPySparkTestCase
 from pyspark.sql.functions import UserDefinedFunction, sha2
 from pyspark.sql.window import Window
-from pyspark.sql.utils import AnalysisException, IllegalArgumentException
+from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
 
 
 class UTCOffsetTimezone(datetime.tzinfo):
@@ -1130,7 +1130,9 @@ def test_replace(self):
     def test_capture_analysis_exception(self):
         self.assertRaises(AnalysisException, lambda: self.sqlCtx.sql("select abc"))
         self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
-        self.assertRaises(AnalysisException, lambda: self.sqlCtx.sql("abc"))
+
+    def test_capture_parse_exception(self):
+        self.assertRaises(ParseException, lambda: self.sqlCtx.sql("abc"))
 
     def test_capture_illegalargument_exception(self):
         self.assertRaisesRegexp(IllegalArgumentException, "Setting negative mapred.reduce.tasks",
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index b0a0373372d20..b89ea8c6e05db 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -33,6 +33,12 @@ class AnalysisException(CapturedException):
     """
 
 
+class ParseException(CapturedException):
+    """
+    Failed to parse a SQL command.
+    """
+
+
 class IllegalArgumentException(CapturedException):
     """
     Passed an illegal or inappropriate argument.
@@ -49,6 +55,8 @@ def deco(*a, **kw):
                                              e.java_exception.getStackTrace()))
             if s.startswith('org.apache.spark.sql.AnalysisException: '):
                 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('org.apache.spark.sql.catalyst.parser.ng.ParseException: '):
+                raise ParseException(s.split(': ', 1)[1], stackTrace)
             if s.startswith('java.lang.IllegalArgumentException: '):
                 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
             raise
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 7ed6d3b1f991d..db09d6ace1c65 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -19,12 +19,11 @@ package org.apache.spark.repl
 
 import java.io.BufferedReader
 
-import Predef.{println => _, _}
-import scala.util.Properties.{javaVersion, versionString, javaVmName}
-
-import scala.tools.nsc.interpreter.{JPrintWriter, ILoop}
+import scala.Predef.{println => _, _}
 import scala.tools.nsc.Settings
+import scala.tools.nsc.interpreter.{ILoop, JPrintWriter}
 import scala.tools.nsc.util.stringFromStream
+import scala.util.Properties.{javaVersion, javaVmName, versionString}
 
 /**
  *  A Spark-specific interactive shell.
@@ -75,11 +74,9 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
     echo("Type :help for more information.")
   }
 
-  import LoopCommand.{ cmd, nullary }
-
-  private val blockedCommands = Set("implicits", "javap", "power", "type", "kind")
+  private val blockedCommands = Set("implicits", "javap", "power", "type", "kind", "reset")
 
-  /** Standard commands **/
+  /** Standard commands */
   lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] =
     standardCommands.filter(cmd => !blockedCommands(cmd.name))
 
@@ -112,9 +109,9 @@ object SparkILoop {
         val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
         val repl = new SparkILoop(input, output)
 
-        if (sets.classpath.isDefault)
+        if (sets.classpath.isDefault) {
           sets.classpath.value = sys.props("java.class.path")
-
+        }
         repl process sets
       }
     }
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 5d1d9edd251c6..9bfe495e9078c 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -75,6 +75,10 @@
       <groupId>org.antlr</groupId>
       <artifactId>antlr-runtime</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.antlr</groupId>
+      <artifactId>antlr4-runtime</artifactId>
+    </dependency>
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
@@ -129,6 +133,21 @@
           </includes>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.antlr</groupId>
+        <artifactId>antlr4-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>antlr4</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <visitor>true</visitor>
+          <sourceDirectory>../catalyst/src/main/antlr4</sourceDirectory>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4
new file mode 100644
index 0000000000000..e46fd9bed5d03
--- /dev/null
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/ng/SqlBase.g4
@@ -0,0 +1,911 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar.
+ */
+
+grammar SqlBase;
+
+tokens {
+    DELIMITER
+}
+
+singleStatement
+    : statement EOF
+    ;
+
+singleExpression
+    : namedExpression EOF
+    ;
+
+singleTableIdentifier
+    : tableIdentifier EOF
+    ;
+
+singleDataType
+    : dataType EOF
+    ;
+
+statement
+    : query                                                            #statementDefault
+    | USE db=identifier                                                #use
+    | CREATE DATABASE (IF NOT EXISTS)? identifier
+        (COMMENT comment=STRING)? locationSpec?
+        (WITH DBPROPERTIES tablePropertyList)?                         #createDatabase
+    | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList     #setDatabaseProperties
+    | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)?      #dropDatabase
+    | createTableHeader ('(' colTypeList ')')? tableProvider
+        (OPTIONS tablePropertyList)?                                   #createTableUsing
+    | createTableHeader tableProvider
+        (OPTIONS tablePropertyList)? AS? query                         #createTableUsing
+    | createTableHeader ('(' colTypeList ')')? (COMMENT STRING)?
+        (PARTITIONED BY identifierList)? bucketSpec? skewSpec?
+        rowFormat?  createFileFormat? locationSpec?
+        (TBLPROPERTIES tablePropertyList)?
+        (AS? query)?                                                   #createTable
+    | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS
+        (identifier | FOR COLUMNS identifierSeq?)                      #analyze
+    | ALTER TABLE from=tableIdentifier RENAME TO to=tableIdentifier    #renameTable
+    | ALTER TABLE tableIdentifier
+        SET TBLPROPERTIES tablePropertyList                            #setTableProperties
+    | ALTER TABLE tableIdentifier
+        UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList             #unsetTableProperties
+    | ALTER TABLE tableIdentifier (partitionSpec)?
+        SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)?     #setTableSerDe
+    | ALTER TABLE tableIdentifier (partitionSpec)?
+        SET SERDEPROPERTIES tablePropertyList                          #setTableSerDe
+    | ALTER TABLE tableIdentifier bucketSpec                           #bucketTable
+    | ALTER TABLE tableIdentifier NOT CLUSTERED                        #unclusterTable
+    | ALTER TABLE tableIdentifier NOT SORTED                           #unsortTable
+    | ALTER TABLE tableIdentifier skewSpec                             #skewTable
+    | ALTER TABLE tableIdentifier NOT SKEWED                           #unskewTable
+    | ALTER TABLE tableIdentifier NOT STORED AS DIRECTORIES            #unstoreTable
+    | ALTER TABLE tableIdentifier
+        SET SKEWED LOCATION skewedLocationList                         #setTableSkewLocations
+    | ALTER TABLE tableIdentifier ADD (IF NOT EXISTS)?
+        partitionSpecLocation+                                         #addTablePartition
+    | ALTER TABLE tableIdentifier
+        from=partitionSpec RENAME TO to=partitionSpec                  #renameTablePartition
+    | ALTER TABLE from=tableIdentifier
+        EXCHANGE partitionSpec WITH TABLE to=tableIdentifier           #exchangeTablePartition
+    | ALTER TABLE tableIdentifier
+        DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE?    #dropTablePartitions
+    | ALTER TABLE tableIdentifier ARCHIVE partitionSpec                #archiveTablePartition
+    | ALTER TABLE tableIdentifier UNARCHIVE partitionSpec              #unarchiveTablePartition
+    | ALTER TABLE tableIdentifier partitionSpec?
+        SET FILEFORMAT fileFormat                                      #setTableFileFormat
+    | ALTER TABLE tableIdentifier partitionSpec? SET locationSpec      #setTableLocation
+    | ALTER TABLE tableIdentifier TOUCH partitionSpec?                 #touchTable
+    | ALTER TABLE tableIdentifier partitionSpec? COMPACT STRING        #compactTable
+    | ALTER TABLE tableIdentifier partitionSpec? CONCATENATE           #concatenateTable
+    | ALTER TABLE tableIdentifier partitionSpec?
+        CHANGE COLUMN? oldName=identifier colType
+        (FIRST | AFTER after=identifier)? (CASCADE | RESTRICT)?        #changeColumn
+    | ALTER TABLE tableIdentifier partitionSpec?
+        ADD COLUMNS '(' colTypeList ')' (CASCADE | RESTRICT)?          #addColumns
+    | ALTER TABLE tableIdentifier partitionSpec?
+        REPLACE COLUMNS '(' colTypeList ')' (CASCADE | RESTRICT)?      #replaceColumns
+    | DROP TABLE (IF EXISTS)? tableIdentifier PURGE?
+        (FOR METADATA? REPLICATION '(' STRING ')')?                    #dropTable
+    | CREATE (OR REPLACE)? VIEW (IF NOT EXISTS)? tableIdentifier
+        identifierCommentList? (COMMENT STRING)?
+        (PARTITIONED ON identifierList)?
+        (TBLPROPERTIES tablePropertyList)? AS query                    #createView
+    | ALTER VIEW tableIdentifier AS? query                             #alterViewQuery
+    | CREATE TEMPORARY? FUNCTION qualifiedName AS className=STRING
+        (USING resource (',' resource)*)?                              #createFunction
+    | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName              #dropFunction
+    | EXPLAIN explainOption* statement                                 #explain
+    | SHOW TABLES ((FROM | IN) db=identifier)?
+        (LIKE (qualifiedName | pattern=STRING))?                       #showTables
+    | SHOW FUNCTIONS (LIKE? (qualifiedName | pattern=STRING))?         #showFunctions
+    | (DESC | DESCRIBE) FUNCTION EXTENDED? qualifiedName               #describeFunction
+    | (DESC | DESCRIBE) option=(EXTENDED | FORMATTED)?
+        tableIdentifier partitionSpec? describeColName?                #describeTable
+    | (DESC | DESCRIBE) DATABASE EXTENDED? identifier                  #describeDatabase
+    | REFRESH TABLE tableIdentifier                                    #refreshTable
+    | CACHE LAZY? TABLE identifier (AS? query)?                        #cacheTable
+    | UNCACHE TABLE identifier                                         #uncacheTable
+    | CLEAR CACHE                                                      #clearCache
+    | ADD identifier .*?                                               #addResource
+    | SET .*?                                                          #setConfiguration
+    | hiveNativeCommands                                               #executeNativeCommand
+    ;
+
+hiveNativeCommands
+    : createTableHeader LIKE tableIdentifier
+        rowFormat?  createFileFormat? locationSpec?
+        (TBLPROPERTIES tablePropertyList)?
+    | DELETE FROM tableIdentifier (WHERE booleanExpression)?
+    | TRUNCATE TABLE tableIdentifier partitionSpec?
+        (COLUMNS identifierList)?
+    | ALTER VIEW from=tableIdentifier AS? RENAME TO to=tableIdentifier
+    | ALTER VIEW from=tableIdentifier AS?
+        SET TBLPROPERTIES tablePropertyList
+    | ALTER VIEW from=tableIdentifier AS?
+        UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList
+    | ALTER VIEW from=tableIdentifier AS?
+        ADD (IF NOT EXISTS)? partitionSpecLocation+
+    | ALTER VIEW from=tableIdentifier AS?
+        DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE?
+    | DROP VIEW (IF EXISTS)? qualifiedName
+    | SHOW COLUMNS (FROM | IN) tableIdentifier ((FROM|IN) identifier)?
+    | START TRANSACTION (transactionMode (',' transactionMode)*)?
+    | COMMIT WORK?
+    | ROLLBACK WORK?
+    | SHOW PARTITIONS tableIdentifier partitionSpec?
+    | DFS .*?
+    | (CREATE | ALTER | DROP | SHOW | DESC | DESCRIBE | REVOKE | GRANT | LOCK | UNLOCK | MSCK | EXPORT | IMPORT | LOAD) .*?
+    ;
+
+createTableHeader
+    : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? tableIdentifier
+    ;
+
+bucketSpec
+    : CLUSTERED BY identifierList
+      (SORTED BY orderedIdentifierList)?
+      INTO INTEGER_VALUE BUCKETS
+    ;
+
+skewSpec
+    : SKEWED BY identifierList
+      ON (constantList | nestedConstantList)
+      (STORED AS DIRECTORIES)?
+    ;
+
+locationSpec
+    : LOCATION STRING
+    ;
+
+query
+    : ctes? queryNoWith
+    ;
+
+insertInto
+    : INSERT OVERWRITE TABLE tableIdentifier partitionSpec? (IF NOT EXISTS)?
+    | INSERT INTO TABLE? tableIdentifier partitionSpec?
+    ;
+
+partitionSpecLocation
+    : partitionSpec locationSpec?
+    ;
+
+partitionSpec
+    : PARTITION '(' partitionVal (',' partitionVal)* ')'
+    ;
+
+partitionVal
+    : identifier (EQ constant)?
+    ;
+
+describeColName
+    : identifier ('.' (identifier | STRING))*
+    ;
+
+ctes
+    : WITH namedQuery (',' namedQuery)*
+    ;
+
+namedQuery
+    : name=identifier AS? '(' queryNoWith ')'
+    ;
+
+tableProvider
+    : USING qualifiedName
+    ;
+
+tablePropertyList
+    : '(' tableProperty (',' tableProperty)* ')'
+    ;
+
+tableProperty
+    : key=tablePropertyKey (EQ? value=STRING)?
+    ;
+
+tablePropertyKey
+    : looseIdentifier ('.' looseIdentifier)*
+    | STRING
+    ;
+
+constantList
+    : '(' constant (',' constant)* ')'
+    ;
+
+nestedConstantList
+    : '(' constantList (',' constantList)* ')'
+    ;
+
+skewedLocation
+    : (constant | constantList) EQ STRING
+    ;
+
+skewedLocationList
+    : '(' skewedLocation (',' skewedLocation)* ')'
+    ;
+
+createFileFormat
+    : STORED AS fileFormat
+    | STORED BY storageHandler
+    ;
+
+fileFormat
+    : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING (SERDE serdeCls=STRING)?
+      (INPUTDRIVER inDriver=STRING OUTPUTDRIVER outDriver=STRING)?                         #tableFileFormat
+    | identifier                                                                           #genericFileFormat
+    ;
+
+storageHandler
+    : STRING (WITH SERDEPROPERTIES tablePropertyList)?
+    ;
+
+resource
+    : identifier STRING
+    ;
+
+queryNoWith
+    : insertInto? queryTerm queryOrganization                                              #singleInsertQuery
+    | fromClause multiInsertQueryBody+                                                     #multiInsertQuery
+    ;
+
+queryOrganization
+    : (ORDER BY order+=sortItem (',' order+=sortItem)*)?
+      (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)?
+      (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)?
+      (SORT BY sort+=sortItem (',' sort+=sortItem)*)?
+      windows?
+      (LIMIT limit=expression)?
+    ;
+
+multiInsertQueryBody
+    : insertInto?
+      querySpecification
+      queryOrganization
+    ;
+
+queryTerm
+    : queryPrimary                                                                         #queryTermDefault
+    | left=queryTerm operator=(INTERSECT | UNION | EXCEPT) setQuantifier? right=queryTerm  #setOperation
+    ;
+
+queryPrimary
+    : querySpecification                                                    #queryPrimaryDefault
+    | TABLE tableIdentifier                                                 #table
+    | inlineTable                                                           #inlineTableDefault1
+    | '(' queryNoWith  ')'                                                  #subquery
+    ;
+
+sortItem
+    : expression ordering=(ASC | DESC)?
+    ;
+
+querySpecification
+    : (((SELECT kind=TRANSFORM '(' namedExpressionSeq ')'
+        | kind=MAP namedExpressionSeq
+        | kind=REDUCE namedExpressionSeq))
+       inRowFormat=rowFormat?
+       (RECORDWRITER recordWriter=STRING)?
+       USING script=STRING
+       (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))?
+       outRowFormat=rowFormat?
+       (RECORDREADER recordReader=STRING)?
+       fromClause?
+       (WHERE where=booleanExpression)?)
+    | ((kind=SELECT setQuantifier? namedExpressionSeq fromClause?
+       | fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?)
+       lateralView*
+       (WHERE where=booleanExpression)?
+       aggregation?
+       (HAVING having=booleanExpression)?
+       windows?)
+    ;
+
+fromClause
+    : FROM relation (',' relation)* lateralView*
+    ;
+
+aggregation
+    : GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* (
+      WITH kind=ROLLUP
+    | WITH kind=CUBE
+    | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')?
+    ;
+
+groupingSet
+    : '(' (expression (',' expression)*)? ')'
+    | expression
+    ;
+
+lateralView
+    : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)?
+    ;
+
+setQuantifier
+    : DISTINCT
+    | ALL
+    ;
+
+relation
+    : left=relation
+      ((CROSS | joinType) JOIN right=relation joinCriteria?
+      | NATURAL joinType JOIN right=relation
+      )                                           #joinRelation
+    | relationPrimary                             #relationDefault
+    ;
+
+joinType
+    : INNER?
+    | LEFT OUTER?
+    | LEFT SEMI
+    | RIGHT OUTER?
+    | FULL OUTER?
+    ;
+
+joinCriteria
+    : ON booleanExpression
+    | USING '(' identifier (',' identifier)* ')'
+    ;
+
+sample
+    : TABLESAMPLE '('
+      ( (percentage=(INTEGER_VALUE | DECIMAL_VALUE) sampleType=PERCENTLIT)
+      | (expression sampleType=ROWS)
+      | (sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE (ON identifier)?))
+      ')'
+    ;
+
+identifierList
+    : '(' identifierSeq ')'
+    ;
+
+identifierSeq
+    : identifier (',' identifier)*
+    ;
+
+orderedIdentifierList
+    : '(' orderedIdentifier (',' orderedIdentifier)* ')'
+    ;
+
+orderedIdentifier
+    : identifier ordering=(ASC | DESC)?
+    ;
+
+identifierCommentList
+    : '(' identifierComment (',' identifierComment)* ')'
+    ;
+
+identifierComment
+    : identifier (COMMENT STRING)?
+    ;
+
+relationPrimary
+    : tableIdentifier sample? (AS? identifier)?                     #tableName
+    | '(' queryNoWith ')' sample? (AS? identifier)?                 #aliasedQuery
+    | '(' relation ')' sample? (AS? identifier)?                    #aliasedRelation
+    | inlineTable                                                   #inlineTableDefault2
+    ;
+
+inlineTable
+    : VALUES expression (',' expression)*  (AS? identifier identifierList?)?
+    ;
+
+rowFormat
+    : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)?  #rowFormatSerde
+    | ROW FORMAT DELIMITED
+      (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)?
+      (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)?
+      (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)?
+      (LINES TERMINATED BY linesSeparatedBy=STRING)?
+      (NULL DEFINED AS nullDefinedAs=STRING)?                                       #rowFormatDelimited
+    ;
+
+tableIdentifier
+    : (db=identifier '.')? table=identifier
+    ;
+
+namedExpression
+    : expression (AS? (identifier | identifierList))?
+    ;
+
+namedExpressionSeq
+    : namedExpression (',' namedExpression)*
+    ;
+
+expression
+    : booleanExpression
+    ;
+
+booleanExpression
+    : predicated                                                   #booleanDefault
+    | NOT booleanExpression                                        #logicalNot
+    | left=booleanExpression operator=AND right=booleanExpression  #logicalBinary
+    | left=booleanExpression operator=OR right=booleanExpression   #logicalBinary
+    | EXISTS '(' query ')'                                         #exists
+    ;
+
+// workaround for:
+//  https://github.com/antlr/antlr4/issues/780
+//  https://github.com/antlr/antlr4/issues/781
+predicated
+    : valueExpression predicate[$valueExpression.ctx]?
+    ;
+
+predicate[ParserRuleContext value]
+    : NOT? BETWEEN lower=valueExpression AND upper=valueExpression        #between
+    | NOT? IN '(' expression (',' expression)* ')'                        #inList
+    | NOT? IN '(' query ')'                                               #inSubquery
+    | NOT? like=(RLIKE | LIKE) pattern=valueExpression                    #like
+    | IS NOT? NULL                                                        #nullPredicate
+    ;
+
+valueExpression
+    : primaryExpression                                                                      #valueExpressionDefault
+    | operator=(MINUS | PLUS | TILDE) valueExpression                                        #arithmeticUnary
+    | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary
+    | left=valueExpression operator=(PLUS | MINUS) right=valueExpression                     #arithmeticBinary
+    | left=valueExpression operator=AMPERSAND right=valueExpression                          #arithmeticBinary
+    | left=valueExpression operator=HAT right=valueExpression                                #arithmeticBinary
+    | left=valueExpression operator=PIPE right=valueExpression                               #arithmeticBinary
+    | left=valueExpression comparisonOperator right=valueExpression                          #comparison
+    ;
+
+primaryExpression
+    : constant                                                                                 #constantDefault
+    | ASTERISK                                                                                 #star
+    | qualifiedName '.' ASTERISK                                                               #star
+    | '(' expression (',' expression)+ ')'                                                     #rowConstructor
+    | qualifiedName '(' (setQuantifier? expression (',' expression)*)? ')' (OVER windowSpec)?  #functionCall
+    | '(' query ')'                                                                            #subqueryExpression
+    | CASE valueExpression whenClause+ (ELSE elseExpression=expression)? END                   #simpleCase
+    | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
+    | CAST '(' expression AS dataType ')'                                                      #cast
+    | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
+    | identifier                                                                               #columnReference
+    | base=primaryExpression '.' fieldName=identifier                                          #dereference
+    | '(' expression ')'                                                                       #parenthesizedExpression
+    ;
+
+constant
+    : NULL                                                                                     #nullLiteral
+    | interval                                                                                 #intervalLiteral
+    | identifier STRING                                                                        #typeConstructor
+    | number                                                                                   #numericLiteral
+    | booleanValue                                                                             #booleanLiteral
+    | STRING+                                                                                  #stringLiteral
+    ;
+
+comparisonOperator
+    : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ
+    ;
+
+booleanValue
+    : TRUE | FALSE
+    ;
+
+interval
+    : INTERVAL intervalField*
+    ;
+
+intervalField
+    : value=intervalValue unit=identifier (TO to=identifier)?
+    ;
+
+intervalValue
+    : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE)
+    | STRING
+    ;
+
+dataType
+    : complex=ARRAY '<' dataType '>'                            #complexDataType
+    | complex=MAP '<' dataType ',' dataType '>'                 #complexDataType
+    | complex=STRUCT ('<' colTypeList? '>' | NEQ)               #complexDataType
+    | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')?  #primitiveDataType
+    ;
+
+colTypeList
+    : colType (',' colType)*
+    ;
+
+colType
+    : identifier ':'? dataType (COMMENT STRING)?
+    ;
+
+whenClause
+    : WHEN condition=expression THEN result=expression
+    ;
+
+windows
+    : WINDOW namedWindow (',' namedWindow)*
+    ;
+
+namedWindow
+    : identifier AS windowSpec
+    ;
+
+windowSpec
+    : name=identifier  #windowRef
+    | '('
+      ( CLUSTER BY partition+=expression (',' partition+=expression)*
+      | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)?
+        ((ORDER | SORT) BY sortItem (',' sortItem)*)?)
+      windowFrame?
+      ')'              #windowDef
+    ;
+
+windowFrame
+    : frameType=RANGE start=frameBound
+    | frameType=ROWS start=frameBound
+    | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+    | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+    ;
+
+frameBound
+    : UNBOUNDED boundType=(PRECEDING | FOLLOWING)
+    | boundType=CURRENT ROW
+    | expression boundType=(PRECEDING | FOLLOWING)
+    ;
+
+
+explainOption
+    : LOGICAL | FORMATTED | EXTENDED
+    ;
+
+transactionMode
+    : ISOLATION LEVEL SNAPSHOT            #isolationLevel
+    | READ accessMode=(ONLY | WRITE)      #transactionAccessMode
+    ;
+
+qualifiedName
+    : identifier ('.' identifier)*
+    ;
+
+// Identifier that also allows the use of a number of SQL keywords (mainly for backwards compatibility).
+looseIdentifier
+    : identifier
+    | FROM
+    | TO
+    | TABLE
+    | WITH
+    ;
+
+identifier
+    : IDENTIFIER             #unquotedIdentifier
+    | quotedIdentifier       #quotedIdentifierAlternative
+    | nonReserved            #unquotedIdentifier
+    ;
+
+quotedIdentifier
+    : BACKQUOTED_IDENTIFIER
+    ;
+
+number
+    : DECIMAL_VALUE            #decimalLiteral
+    | SCIENTIFIC_DECIMAL_VALUE #scientificDecimalLiteral
+    | INTEGER_VALUE            #integerLiteral
+    | BIGINT_LITERAL           #bigIntLiteral
+    | SMALLINT_LITERAL         #smallIntLiteral
+    | TINYINT_LITERAL          #tinyIntLiteral
+    | DOUBLE_LITERAL           #doubleLiteral
+    ;
+
+nonReserved
+    : SHOW | TABLES | COLUMNS | COLUMN | PARTITIONS | FUNCTIONS
+    | ADD
+    | OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | MAP | ARRAY | STRUCT
+    | LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER
+    | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
+    | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS
+    | GROUPING | CUBE | ROLLUP
+    | EXPLAIN | FORMAT | LOGICAL | FORMATTED
+    | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
+    | SET
+    | VIEW | REPLACE
+    | IF
+    | NO | DATA
+    | START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL
+    | SNAPSHOT | READ | WRITE | ONLY
+    | SORT | CLUSTER | DISTRIBUTE UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
+    | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST
+    | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT
+    | INPUTDRIVER | OUTPUTDRIVER | DBPROPERTIES | DFS | TRUNCATE | METADATA | REPLICATION | COMPUTE
+    | STATISTICS | ANALYZE | PARTITIONED | EXTERNAL | DEFINED | RECORDWRITER
+    | REVOKE | GRANT | LOCK | UNLOCK | MSCK | EXPORT | IMPORT | LOAD | VALUES | COMMENT
+    ;
+
+SELECT: 'SELECT';
+FROM: 'FROM';
+ADD: 'ADD';
+AS: 'AS';
+ALL: 'ALL';
+DISTINCT: 'DISTINCT';
+WHERE: 'WHERE';
+GROUP: 'GROUP';
+BY: 'BY';
+GROUPING: 'GROUPING';
+SETS: 'SETS';
+CUBE: 'CUBE';
+ROLLUP: 'ROLLUP';
+ORDER: 'ORDER';
+HAVING: 'HAVING';
+LIMIT: 'LIMIT';
+AT: 'AT';
+OR: 'OR';
+AND: 'AND';
+IN: 'IN';
+NOT: 'NOT' | '!';
+NO: 'NO';
+EXISTS: 'EXISTS';
+BETWEEN: 'BETWEEN';
+LIKE: 'LIKE';
+RLIKE: 'RLIKE' | 'REGEXP';
+IS: 'IS';
+NULL: 'NULL';
+TRUE: 'TRUE';
+FALSE: 'FALSE';
+NULLS: 'NULLS';
+ASC: 'ASC';
+DESC: 'DESC';
+FOR: 'FOR';
+INTERVAL: 'INTERVAL';
+CASE: 'CASE';
+WHEN: 'WHEN';
+THEN: 'THEN';
+ELSE: 'ELSE';
+END: 'END';
+JOIN: 'JOIN';
+CROSS: 'CROSS';
+OUTER: 'OUTER';
+INNER: 'INNER';
+LEFT: 'LEFT';
+SEMI: 'SEMI';
+RIGHT: 'RIGHT';
+FULL: 'FULL';
+NATURAL: 'NATURAL';
+ON: 'ON';
+LATERAL: 'LATERAL';
+WINDOW: 'WINDOW';
+OVER: 'OVER';
+PARTITION: 'PARTITION';
+RANGE: 'RANGE';
+ROWS: 'ROWS';
+UNBOUNDED: 'UNBOUNDED';
+PRECEDING: 'PRECEDING';
+FOLLOWING: 'FOLLOWING';
+CURRENT: 'CURRENT';
+ROW: 'ROW';
+WITH: 'WITH';
+VALUES: 'VALUES';
+CREATE: 'CREATE';
+TABLE: 'TABLE';
+VIEW: 'VIEW';
+REPLACE: 'REPLACE';
+INSERT: 'INSERT';
+DELETE: 'DELETE';
+INTO: 'INTO';
+DESCRIBE: 'DESCRIBE';
+EXPLAIN: 'EXPLAIN';
+FORMAT: 'FORMAT';
+LOGICAL: 'LOGICAL';
+CAST: 'CAST';
+SHOW: 'SHOW';
+TABLES: 'TABLES';
+COLUMNS: 'COLUMNS';
+COLUMN: 'COLUMN';
+USE: 'USE';
+PARTITIONS: 'PARTITIONS';
+FUNCTIONS: 'FUNCTIONS';
+DROP: 'DROP';
+UNION: 'UNION';
+EXCEPT: 'EXCEPT';
+INTERSECT: 'INTERSECT';
+TO: 'TO';
+TABLESAMPLE: 'TABLESAMPLE';
+STRATIFY: 'STRATIFY';
+ALTER: 'ALTER';
+RENAME: 'RENAME';
+ARRAY: 'ARRAY';
+MAP: 'MAP';
+STRUCT: 'STRUCT';
+COMMENT: 'COMMENT';
+SET: 'SET';
+DATA: 'DATA';
+START: 'START';
+TRANSACTION: 'TRANSACTION';
+COMMIT: 'COMMIT';
+ROLLBACK: 'ROLLBACK';
+WORK: 'WORK';
+ISOLATION: 'ISOLATION';
+LEVEL: 'LEVEL';
+SNAPSHOT: 'SNAPSHOT';
+READ: 'READ';
+WRITE: 'WRITE';
+ONLY: 'ONLY';
+
+IF: 'IF';
+
+EQ  : '=' | '==';
+NSEQ: '<=>';
+NEQ : '<>';
+NEQJ: '!=';
+LT  : '<';
+LTE : '<=';
+GT  : '>';
+GTE : '>=';
+
+PLUS: '+';
+MINUS: '-';
+ASTERISK: '*';
+SLASH: '/';
+PERCENT: '%';
+DIV: 'DIV';
+TILDE: '~';
+AMPERSAND: '&';
+PIPE: '|';
+HAT: '^';
+
+PERCENTLIT: 'PERCENT';
+BUCKET: 'BUCKET';
+OUT: 'OUT';
+OF: 'OF';
+
+SORT: 'SORT';
+CLUSTER: 'CLUSTER';
+DISTRIBUTE: 'DISTRIBUTE';
+OVERWRITE: 'OVERWRITE';
+TRANSFORM: 'TRANSFORM';
+REDUCE: 'REDUCE';
+USING: 'USING';
+SERDE: 'SERDE';
+SERDEPROPERTIES: 'SERDEPROPERTIES';
+RECORDREADER: 'RECORDREADER';
+RECORDWRITER: 'RECORDWRITER';
+DELIMITED: 'DELIMITED';
+FIELDS: 'FIELDS';
+TERMINATED: 'TERMINATED';
+COLLECTION: 'COLLECTION';
+ITEMS: 'ITEMS';
+KEYS: 'KEYS';
+ESCAPED: 'ESCAPED';
+LINES: 'LINES';
+SEPARATED: 'SEPARATED';
+FUNCTION: 'FUNCTION';
+EXTENDED: 'EXTENDED';
+REFRESH: 'REFRESH';
+CLEAR: 'CLEAR';
+CACHE: 'CACHE';
+UNCACHE: 'UNCACHE';
+LAZY: 'LAZY';
+FORMATTED: 'FORMATTED';
+TEMPORARY: 'TEMPORARY' | 'TEMP';
+OPTIONS: 'OPTIONS';
+UNSET: 'UNSET';
+TBLPROPERTIES: 'TBLPROPERTIES';
+DBPROPERTIES: 'DBPROPERTIES';
+BUCKETS: 'BUCKETS';
+SKEWED: 'SKEWED';
+STORED: 'STORED';
+DIRECTORIES: 'DIRECTORIES';
+LOCATION: 'LOCATION';
+EXCHANGE: 'EXCHANGE';
+ARCHIVE: 'ARCHIVE';
+UNARCHIVE: 'UNARCHIVE';
+FILEFORMAT: 'FILEFORMAT';
+TOUCH: 'TOUCH';
+COMPACT: 'COMPACT';
+CONCATENATE: 'CONCATENATE';
+CHANGE: 'CHANGE';
+FIRST: 'FIRST';
+AFTER: 'AFTER';
+CASCADE: 'CASCADE';
+RESTRICT: 'RESTRICT';
+CLUSTERED: 'CLUSTERED';
+SORTED: 'SORTED';
+PURGE: 'PURGE';
+INPUTFORMAT: 'INPUTFORMAT';
+OUTPUTFORMAT: 'OUTPUTFORMAT';
+INPUTDRIVER: 'INPUTDRIVER';
+OUTPUTDRIVER: 'OUTPUTDRIVER';
+DATABASE: 'DATABASE' | 'SCHEMA';
+DFS: 'DFS';
+TRUNCATE: 'TRUNCATE';
+METADATA: 'METADATA';
+REPLICATION: 'REPLICATION';
+ANALYZE: 'ANALYZE';
+COMPUTE: 'COMPUTE';
+STATISTICS: 'STATISTICS';
+PARTITIONED: 'PARTITIONED';
+EXTERNAL: 'EXTERNAL';
+DEFINED: 'DEFINED';
+REVOKE: 'REVOKE';
+GRANT: 'GRANT';
+LOCK: 'LOCK';
+UNLOCK: 'UNLOCK';
+MSCK: 'MSCK';
+EXPORT: 'EXPORT';
+IMPORT: 'IMPORT';
+LOAD: 'LOAD';
+
+STRING
+    : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
+    | '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"'
+    ;
+
+BIGINT_LITERAL
+    : DIGIT+ 'L'
+    ;
+
+SMALLINT_LITERAL
+    : DIGIT+ 'S'
+    ;
+
+TINYINT_LITERAL
+    : DIGIT+ 'Y'
+    ;
+
+INTEGER_VALUE
+    : DIGIT+
+    ;
+
+DECIMAL_VALUE
+    : DIGIT+ '.' DIGIT*
+    | '.' DIGIT+
+    ;
+
+SCIENTIFIC_DECIMAL_VALUE
+    : DIGIT+ ('.' DIGIT*)? EXPONENT
+    | '.' DIGIT+ EXPONENT
+    ;
+
+DOUBLE_LITERAL
+    :
+    (INTEGER_VALUE | DECIMAL_VALUE | SCIENTIFIC_DECIMAL_VALUE) 'D'
+    ;
+
+IDENTIFIER
+    : (LETTER | DIGIT | '_')+
+    ;
+
+BACKQUOTED_IDENTIFIER
+    : '`' ( ~'`' | '``' )* '`'
+    ;
+
+fragment EXPONENT
+    : 'E' [+-]? DIGIT+
+    ;
+
+fragment DIGIT
+    : [0-9]
+    ;
+
+fragment LETTER
+    : [A-Z]
+    ;
+
+SIMPLE_COMMENT
+    : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
+    ;
+
+BRACKETED_COMMENT
+    : '/*' .*? '*/' -> channel(HIDDEN)
+    ;
+
+WS
+    : [ \r\n\t]+ -> channel(HIDDEN)
+    ;
+
+// Catch-all for anything we can't recognize.
+// We use this to be able to ignore and recover all the text
+// when splitting statements with DelimiterLexer
+UNRECOGNIZED
+    : .
+    ;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bf275c88344e2..418771144310a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -42,9 +42,15 @@ import org.apache.spark.sql.types._
  * to resolve attribute references.
  */
 object SimpleAnalyzer
-  extends SimpleAnalyzer(new SimpleCatalystConf(caseSensitiveAnalysis = true))
-class SimpleAnalyzer(conf: CatalystConf)
-  extends Analyzer(new SessionCatalog(new InMemoryCatalog, conf), EmptyFunctionRegistry, conf)
+  extends SimpleAnalyzer(
+    EmptyFunctionRegistry,
+    new SimpleCatalystConf(caseSensitiveAnalysis = true))
+
+class SimpleAnalyzer(functionRegistry: FunctionRegistry, conf: CatalystConf)
+  extends Analyzer(
+    new SessionCatalog(new InMemoryCatalog, functionRegistry, conf),
+    functionRegistry,
+    conf)
 
 /**
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index f584a4b73a007..e9788b7e4dd51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -45,6 +45,13 @@ trait FunctionRegistry {
 
   /* Get the class of the registered function by specified name. */
   def lookupFunction(name: String): Option[ExpressionInfo]
+
+  /* Get the builder of the registered function by specified name. */
+  def lookupFunctionBuilder(name: String): Option[FunctionBuilder]
+
+  /** Drop a function and return whether the function existed. */
+  def dropFunction(name: String): Boolean
+
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
@@ -76,6 +83,14 @@ class SimpleFunctionRegistry extends FunctionRegistry {
     functionBuilders.get(name).map(_._1)
   }
 
+  override def lookupFunctionBuilder(name: String): Option[FunctionBuilder] = synchronized {
+    functionBuilders.get(name).map(_._2)
+  }
+
+  override def dropFunction(name: String): Boolean = synchronized {
+    functionBuilders.remove(name).isDefined
+  }
+
   def copy(): SimpleFunctionRegistry = synchronized {
     val registry = new SimpleFunctionRegistry
     functionBuilders.iterator.foreach { case (name, (info, builder)) =>
@@ -106,6 +121,15 @@ object EmptyFunctionRegistry extends FunctionRegistry {
   override def lookupFunction(name: String): Option[ExpressionInfo] = {
     throw new UnsupportedOperationException
   }
+
+  override def lookupFunctionBuilder(name: String): Option[FunctionBuilder] = {
+    throw new UnsupportedOperationException
+  }
+
+  override def dropFunction(name: String): Boolean = {
+    throw new UnsupportedOperationException
+  }
+
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index e216fa552804b..2bbb970ec92e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -155,7 +155,7 @@ class InMemoryCatalog extends ExternalCatalog {
       tableDefinition: CatalogTable,
       ignoreIfExists: Boolean): Unit = synchronized {
     requireDbExists(db)
-    val table = tableDefinition.name.table
+    val table = tableDefinition.identifier.table
     if (tableExists(db, table)) {
       if (!ignoreIfExists) {
         throw new AnalysisException(s"Table '$table' already exists in database '$db'")
@@ -182,14 +182,14 @@ class InMemoryCatalog extends ExternalCatalog {
   override def renameTable(db: String, oldName: String, newName: String): Unit = synchronized {
     requireTableExists(db, oldName)
     val oldDesc = catalog(db).tables(oldName)
-    oldDesc.table = oldDesc.table.copy(name = TableIdentifier(newName, Some(db)))
+    oldDesc.table = oldDesc.table.copy(identifier = TableIdentifier(newName, Some(db)))
     catalog(db).tables.put(newName, oldDesc)
     catalog(db).tables.remove(oldName)
   }
 
   override def alterTable(db: String, tableDefinition: CatalogTable): Unit = synchronized {
-    requireTableExists(db, tableDefinition.name.table)
-    catalog(db).tables(tableDefinition.name.table).table = tableDefinition
+    requireTableExists(db, tableDefinition.identifier.table)
+    catalog(db).tables(tableDefinition.identifier.table).table = tableDefinition
   }
 
   override def getTable(db: String, table: String): CatalogTable = synchronized {
@@ -296,10 +296,10 @@ class InMemoryCatalog extends ExternalCatalog {
 
   override def createFunction(db: String, func: CatalogFunction): Unit = synchronized {
     requireDbExists(db)
-    if (functionExists(db, func.name.funcName)) {
+    if (functionExists(db, func.identifier.funcName)) {
       throw new AnalysisException(s"Function '$func' already exists in '$db' database")
     } else {
-      catalog(db).functions.put(func.name.funcName, func)
+      catalog(db).functions.put(func.identifier.funcName, func)
     }
   }
 
@@ -310,14 +310,14 @@ class InMemoryCatalog extends ExternalCatalog {
 
   override def renameFunction(db: String, oldName: String, newName: String): Unit = synchronized {
     requireFunctionExists(db, oldName)
-    val newFunc = getFunction(db, oldName).copy(name = FunctionIdentifier(newName, Some(db)))
+    val newFunc = getFunction(db, oldName).copy(identifier = FunctionIdentifier(newName, Some(db)))
     catalog(db).functions.remove(oldName)
     catalog(db).functions.put(newName, newFunc)
   }
 
   override def alterFunction(db: String, funcDefinition: CatalogFunction): Unit = synchronized {
-    requireFunctionExists(db, funcDefinition.name.funcName)
-    catalog(db).functions.put(funcDefinition.name.funcName, funcDefinition)
+    requireFunctionExists(db, funcDefinition.identifier.funcName)
+    catalog(db).functions.put(funcDefinition.identifier.funcName, funcDefinition)
   }
 
   override def getFunction(db: String, funcName: String): CatalogFunction = synchronized {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 34265faa74399..7165db1d5d2ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import java.util.concurrent.ConcurrentHashMap
-
-import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, SimpleFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 
 
@@ -31,16 +32,25 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
  * An internal catalog that is used by a Spark Session. This internal catalog serves as a
  * proxy to the underlying metastore (e.g. Hive Metastore) and it also manages temporary
  * tables and functions of the Spark Session that it belongs to.
+ *
+ * This class is not thread-safe.
  */
-class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
+class SessionCatalog(
+    externalCatalog: ExternalCatalog,
+    functionRegistry: FunctionRegistry,
+    conf: CatalystConf) {
   import ExternalCatalog._
 
+  def this(externalCatalog: ExternalCatalog, functionRegistry: FunctionRegistry) {
+    this(externalCatalog, functionRegistry, new SimpleCatalystConf(true))
+  }
+
+  // For testing only.
   def this(externalCatalog: ExternalCatalog) {
-    this(externalCatalog, new SimpleCatalystConf(true))
+    this(externalCatalog, new SimpleFunctionRegistry)
   }
 
-  protected[this] val tempTables = new ConcurrentHashMap[String, LogicalPlan]
-  protected[this] val tempFunctions = new ConcurrentHashMap[String, CatalogFunction]
+  protected[this] val tempTables = new mutable.HashMap[String, LogicalPlan]
 
   // Note: we track current database here because certain operations do not explicitly
   // specify the database (e.g. DROP TABLE my_table). In these cases we must first
@@ -122,9 +132,9 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
    * If no such database is specified, create it in the current database.
    */
   def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
-    val db = tableDefinition.name.database.getOrElse(currentDb)
-    val table = formatTableName(tableDefinition.name.table)
-    val newTableDefinition = tableDefinition.copy(name = TableIdentifier(table, Some(db)))
+    val db = tableDefinition.identifier.database.getOrElse(currentDb)
+    val table = formatTableName(tableDefinition.identifier.table)
+    val newTableDefinition = tableDefinition.copy(identifier = TableIdentifier(table, Some(db)))
     externalCatalog.createTable(db, newTableDefinition, ignoreIfExists)
   }
 
@@ -138,9 +148,9 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
    * this becomes a no-op.
    */
   def alterTable(tableDefinition: CatalogTable): Unit = {
-    val db = tableDefinition.name.database.getOrElse(currentDb)
-    val table = formatTableName(tableDefinition.name.table)
-    val newTableDefinition = tableDefinition.copy(name = TableIdentifier(table, Some(db)))
+    val db = tableDefinition.identifier.database.getOrElse(currentDb)
+    val table = formatTableName(tableDefinition.identifier.table)
+    val newTableDefinition = tableDefinition.copy(identifier = TableIdentifier(table, Some(db)))
     externalCatalog.alterTable(db, newTableDefinition)
   }
 
@@ -164,9 +174,9 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
   def createTempTable(
       name: String,
       tableDefinition: LogicalPlan,
-      ignoreIfExists: Boolean): Unit = {
+      overrideIfExists: Boolean): Unit = {
     val table = formatTableName(name)
-    if (tempTables.containsKey(table) && !ignoreIfExists) {
+    if (tempTables.contains(table) && !overrideIfExists) {
       throw new AnalysisException(s"Temporary table '$name' already exists.")
     }
     tempTables.put(table, tableDefinition)
@@ -188,10 +198,11 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
     val db = oldName.database.getOrElse(currentDb)
     val oldTableName = formatTableName(oldName.table)
     val newTableName = formatTableName(newName.table)
-    if (oldName.database.isDefined || !tempTables.containsKey(oldTableName)) {
+    if (oldName.database.isDefined || !tempTables.contains(oldTableName)) {
       externalCatalog.renameTable(db, oldTableName, newTableName)
     } else {
-      val table = tempTables.remove(oldTableName)
+      val table = tempTables(oldTableName)
+      tempTables.remove(oldTableName)
       tempTables.put(newTableName, table)
     }
   }
@@ -206,7 +217,7 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
   def dropTable(name: TableIdentifier, ignoreIfNotExists: Boolean): Unit = {
     val db = name.database.getOrElse(currentDb)
     val table = formatTableName(name.table)
-    if (name.database.isDefined || !tempTables.containsKey(table)) {
+    if (name.database.isDefined || !tempTables.contains(table)) {
       externalCatalog.dropTable(db, table, ignoreIfNotExists)
     } else {
       tempTables.remove(table)
@@ -224,11 +235,11 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
     val db = name.database.getOrElse(currentDb)
     val table = formatTableName(name.table)
     val relation =
-      if (name.database.isDefined || !tempTables.containsKey(table)) {
+      if (name.database.isDefined || !tempTables.contains(table)) {
         val metadata = externalCatalog.getTable(db, table)
         CatalogRelation(db, metadata, alias)
       } else {
-        tempTables.get(table)
+        tempTables(table)
       }
     val qualifiedTable = SubqueryAlias(table, relation)
     // If an alias was specified by the lookup, wrap the plan in a subquery so that
@@ -247,7 +258,7 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
   def tableExists(name: TableIdentifier): Boolean = {
     val db = name.database.getOrElse(currentDb)
     val table = formatTableName(name.table)
-    if (name.database.isDefined || !tempTables.containsKey(table)) {
+    if (name.database.isDefined || !tempTables.contains(table)) {
       externalCatalog.tableExists(db, table)
     } else {
       true // it's a temporary table
@@ -266,7 +277,7 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
     val dbTables =
       externalCatalog.listTables(db, pattern).map { t => TableIdentifier(t, Some(db)) }
     val regex = pattern.replaceAll("\\*", ".*").r
-    val _tempTables = tempTables.keys().asScala
+    val _tempTables = tempTables.keys.toSeq
       .filter { t => regex.pattern.matcher(t).matches() }
       .map { t => TableIdentifier(t) }
     dbTables ++ _tempTables
@@ -290,7 +301,7 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
    * For testing only.
    */
   private[catalog] def getTempTable(name: String): Option[LogicalPlan] = {
-    Option(tempTables.get(name))
+    tempTables.get(name)
   }
 
   // ----------------------------------------------------------------------------
@@ -399,9 +410,9 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
    * If no such database is specified, create it in the current database.
    */
   def createFunction(funcDefinition: CatalogFunction): Unit = {
-    val db = funcDefinition.name.database.getOrElse(currentDb)
+    val db = funcDefinition.identifier.database.getOrElse(currentDb)
     val newFuncDefinition = funcDefinition.copy(
-      name = FunctionIdentifier(funcDefinition.name.funcName, Some(db)))
+      identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db)))
     externalCatalog.createFunction(db, newFuncDefinition)
   }
 
@@ -424,12 +435,24 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
    * this becomes a no-op.
    */
   def alterFunction(funcDefinition: CatalogFunction): Unit = {
-    val db = funcDefinition.name.database.getOrElse(currentDb)
+    val db = funcDefinition.identifier.database.getOrElse(currentDb)
     val newFuncDefinition = funcDefinition.copy(
-      name = FunctionIdentifier(funcDefinition.name.funcName, Some(db)))
+      identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db)))
     externalCatalog.alterFunction(db, newFuncDefinition)
   }
 
+  /**
+   * Retrieve the metadata of a metastore function.
+   *
+   * If a database is specified in `name`, this will return the function in that database.
+   * If no database is specified, this will return the function in the current database.
+   */
+  def getFunction(name: FunctionIdentifier): CatalogFunction = {
+    val db = name.database.getOrElse(currentDb)
+    externalCatalog.getFunction(db, name.funcName)
+  }
+
+
   // ----------------------------------------------------------------
   // | Methods that interact with temporary and metastore functions |
   // ----------------------------------------------------------------
@@ -438,14 +461,14 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
    * Create a temporary function.
    * This assumes no database is specified in `funcDefinition`.
    */
-  def createTempFunction(funcDefinition: CatalogFunction, ignoreIfExists: Boolean): Unit = {
-    require(funcDefinition.name.database.isEmpty,
-      "attempted to create a temporary function while specifying a database")
-    val name = funcDefinition.name.funcName
-    if (tempFunctions.containsKey(name) && !ignoreIfExists) {
+  def createTempFunction(
+      name: String,
+      funcDefinition: FunctionBuilder,
+      ignoreIfExists: Boolean): Unit = {
+    if (functionRegistry.lookupFunctionBuilder(name).isDefined && !ignoreIfExists) {
       throw new AnalysisException(s"Temporary function '$name' already exists.")
     }
-    tempFunctions.put(name, funcDefinition)
+    functionRegistry.registerFunction(name, funcDefinition)
   }
 
   /**
@@ -455,11 +478,10 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
   // Hive has DROP FUNCTION and DROP TEMPORARY FUNCTION. We may want to consolidate
   // dropFunction and dropTempFunction.
   def dropTempFunction(name: String, ignoreIfNotExists: Boolean): Unit = {
-    if (!tempFunctions.containsKey(name) && !ignoreIfNotExists) {
+    if (!functionRegistry.dropFunction(name) && !ignoreIfNotExists) {
       throw new AnalysisException(
         s"Temporary function '$name' cannot be dropped because it does not exist!")
     }
-    tempFunctions.remove(name)
   }
 
   /**
@@ -476,33 +498,29 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
       throw new AnalysisException("rename does not support moving functions across databases")
     }
     val db = oldName.database.getOrElse(currentDb)
-    if (oldName.database.isDefined || !tempFunctions.containsKey(oldName.funcName)) {
+    val oldBuilder = functionRegistry.lookupFunctionBuilder(oldName.funcName)
+    if (oldName.database.isDefined || oldBuilder.isEmpty) {
       externalCatalog.renameFunction(db, oldName.funcName, newName.funcName)
     } else {
-      val func = tempFunctions.remove(oldName.funcName)
-      val newFunc = func.copy(name = func.name.copy(funcName = newName.funcName))
-      tempFunctions.put(newName.funcName, newFunc)
+      val oldExpressionInfo = functionRegistry.lookupFunction(oldName.funcName).get
+      val newExpressionInfo = new ExpressionInfo(
+        oldExpressionInfo.getClassName,
+        newName.funcName,
+        oldExpressionInfo.getUsage,
+        oldExpressionInfo.getExtended)
+      functionRegistry.dropFunction(oldName.funcName)
+      functionRegistry.registerFunction(newName.funcName, newExpressionInfo, oldBuilder.get)
     }
   }
 
   /**
-   * Retrieve the metadata of an existing function.
-   *
-   * If a database is specified in `name`, this will return the function in that database.
-   * If no database is specified, this will first attempt to return a temporary function with
-   * the same name, then, if that does not exist, return the function in the current database.
+   * Return an [[Expression]] that represents the specified function, assuming it exists.
+   * Note: This is currently only used for temporary functions.
    */
-  def getFunction(name: FunctionIdentifier): CatalogFunction = {
-    val db = name.database.getOrElse(currentDb)
-    if (name.database.isDefined || !tempFunctions.containsKey(name.funcName)) {
-      externalCatalog.getFunction(db, name.funcName)
-    } else {
-      tempFunctions.get(name.funcName)
-    }
+  def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    functionRegistry.lookupFunction(name, children)
   }
 
-  // TODO: implement lookupFunction that returns something from the registry itself
-
   /**
    * List all matching functions in the specified database, including temporary functions.
    */
@@ -510,7 +528,7 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
     val dbFunctions =
       externalCatalog.listFunctions(db, pattern).map { f => FunctionIdentifier(f, Some(db)) }
     val regex = pattern.replaceAll("\\*", ".*").r
-    val _tempFunctions = tempFunctions.keys().asScala
+    val _tempFunctions = functionRegistry.listFunction()
       .filter { f => regex.pattern.matcher(f).matches() }
       .map { f => FunctionIdentifier(f) }
     dbFunctions ++ _tempFunctions
@@ -519,8 +537,8 @@ class SessionCatalog(externalCatalog: ExternalCatalog, conf: CatalystConf) {
   /**
    * Return a temporary function. For testing only.
    */
-  private[catalog] def getTempFunction(name: String): Option[CatalogFunction] = {
-    Option(tempFunctions.get(name))
+  private[catalog] def getTempFunction(name: String): Option[FunctionBuilder] = {
+    functionRegistry.lookupFunctionBuilder(name)
   }
 
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 34803133f6a61..8bb8e09a28600 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -169,10 +169,10 @@ abstract class ExternalCatalog {
 /**
  * A function defined in the catalog.
  *
- * @param name name of the function
+ * @param identifier name of the function
  * @param className fully qualified class name, e.g. "org.apache.spark.util.MyFunc"
  */
-case class CatalogFunction(name: FunctionIdentifier, className: String)
+case class CatalogFunction(identifier: FunctionIdentifier, className: String)
 
 
 /**
@@ -216,7 +216,7 @@ case class CatalogTablePartition(
  * future once we have a better understanding of how we want to handle skewed columns.
  */
 case class CatalogTable(
-    name: TableIdentifier,
+    identifier: TableIdentifier,
     tableType: CatalogTableType,
     storage: CatalogStorageFormat,
     schema: Seq[CatalogColumn],
@@ -230,12 +230,12 @@ case class CatalogTable(
     viewText: Option[String] = None) {
 
   /** Return the database this table was specified to belong to, assuming it exists. */
-  def database: String = name.database.getOrElse {
-    throw new AnalysisException(s"table $name did not specify database")
+  def database: String = identifier.database.getOrElse {
+    throw new AnalysisException(s"table $identifier did not specify database")
   }
 
   /** Return the fully qualified name of this table, assuming the database was specified. */
-  def qualifiedName: String = name.unquotedString
+  def qualifiedName: String = identifier.unquotedString
 
   /** Syntactic sugar to update a field in `storage`. */
   def withNewStorage(
@@ -290,6 +290,6 @@ case class CatalogRelation(
   // TODO: implement this
   override def output: Seq[Attribute] = Seq.empty
 
-  require(metadata.name.database == Some(db),
+  require(metadata.identifier.database == Some(db),
     "provided database does not much the one specified in the table definition")
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 3540014c3e99c..105947028d931 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -21,7 +21,7 @@ import java.sql.{Date, Timestamp}
 
 import scala.language.implicitConversions
 
-import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedAttribute, UnresolvedExtractValue}
+import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
@@ -161,6 +161,10 @@ package object dsl {
     def lower(e: Expression): Expression = Lower(e)
     def sqrt(e: Expression): Expression = Sqrt(e)
     def abs(e: Expression): Expression = Abs(e)
+    def star(names: String*): Expression = names match {
+      case Seq() => UnresolvedStar(None)
+      case target => UnresolvedStar(Option(target))
+    }
 
     implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s: String = sym.name }
     // TODO more implicit class for literal?
@@ -231,6 +235,12 @@ package object dsl {
         AttributeReference(s, structType, nullable = true)()
       def struct(attrs: AttributeReference*): AttributeReference =
         struct(StructType.fromAttributes(attrs))
+
+      /** Create a function. */
+      def function(exprs: Expression*): UnresolvedFunction =
+        UnresolvedFunction(s, exprs, isDistinct = false)
+      def distinctFunction(exprs: Expression*): UnresolvedFunction =
+        UnresolvedFunction(s, exprs, isDistinct = true)
     }
 
     implicit class DslAttribute(a: AttributeReference) {
@@ -243,8 +253,20 @@ package object dsl {
   object expressions extends ExpressionConversions  // scalastyle:ignore
 
   object plans {  // scalastyle:ignore
+    def table(ref: String): LogicalPlan =
+      UnresolvedRelation(TableIdentifier(ref), None)
+
+    def table(db: String, ref: String): LogicalPlan =
+      UnresolvedRelation(TableIdentifier(ref, Option(db)), None)
+
     implicit class DslLogicalPlan(val logicalPlan: LogicalPlan) {
-      def select(exprs: NamedExpression*): LogicalPlan = Project(exprs, logicalPlan)
+      def select(exprs: Expression*): LogicalPlan = {
+        val namedExpressions = exprs.map {
+          case e: NamedExpression => e
+          case e => UnresolvedAlias(e)
+        }
+        Project(namedExpressions, logicalPlan)
+      }
 
       def where(condition: Expression): LogicalPlan = Filter(condition, logicalPlan)
 
@@ -296,6 +318,14 @@ package object dsl {
           analysis.UnresolvedRelation(TableIdentifier(tableName)),
           Map.empty, logicalPlan, overwrite, false)
 
+      def as(alias: String): LogicalPlan = logicalPlan match {
+        case UnresolvedRelation(tbl, _) => UnresolvedRelation(tbl, Option(alias))
+        case plan => SubqueryAlias(alias, plan)
+      }
+
+      def distribute(exprs: Expression*): LogicalPlan =
+        RepartitionByExpression(exprs, logicalPlan)
+
       def analyze: LogicalPlan =
         EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index ed812e06799a9..1e9c9718006c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -237,21 +237,35 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic
     } else {
       s"($javaType)(${eval1.value} $symbol ${eval2.value})"
     }
-    s"""
-      ${eval2.code}
-      boolean ${ev.isNull} = false;
-      $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
-      if (${eval2.isNull} || $isZero) {
-        ${ev.isNull} = true;
-      } else {
-        ${eval1.code}
-        if (${eval1.isNull}) {
+    if (!left.nullable && !right.nullable) {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if ($isZero) {
           ${ev.isNull} = true;
         } else {
+          ${eval1.code}
           ${ev.value} = $divide;
         }
-      }
-    """
+      """
+    } else {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if (${eval2.isNull} || $isZero) {
+          ${ev.isNull} = true;
+        } else {
+          ${eval1.code}
+          if (${eval1.isNull}) {
+            ${ev.isNull} = true;
+          } else {
+            ${ev.value} = $divide;
+          }
+        }
+      """
+    }
   }
 }
 
@@ -299,21 +313,35 @@ case class Remainder(left: Expression, right: Expression) extends BinaryArithmet
     } else {
       s"($javaType)(${eval1.value} $symbol ${eval2.value})"
     }
-    s"""
-      ${eval2.code}
-      boolean ${ev.isNull} = false;
-      $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
-      if (${eval2.isNull} || $isZero) {
-        ${ev.isNull} = true;
-      } else {
-        ${eval1.code}
-        if (${eval1.isNull}) {
+    if (!left.nullable && !right.nullable) {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if ($isZero) {
           ${ev.isNull} = true;
         } else {
+          ${eval1.code}
           ${ev.value} = $remainder;
         }
-      }
-    """
+      """
+    } else {
+      s"""
+        ${eval2.code}
+        boolean ${ev.isNull} = false;
+        $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+        if (${eval2.isNull} || $isZero) {
+          ${ev.isNull} = true;
+        } else {
+          ${eval1.code}
+          if (${eval1.isNull}) {
+            ${ev.isNull} = true;
+          } else {
+            ${ev.value} = $remainder;
+          }
+        }
+      """
+    }
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index b511b4b3a08e0..cd490dd676442 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -661,7 +661,7 @@ object CodeGenerator extends Logging {
     logDebug({
       // Only add extra debugging info to byte code when we are going to print the source code.
       evaluator.setDebuggingInformation(true, true, false)
-      formatted
+      s"\n$formatted"
     })
 
     try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 20818bfb1a514..e23ad5596bb2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -274,22 +274,35 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
     val eval2 = right.gen(ctx)
 
     // The result should be `false`, if any of them is `false` whenever the other is null or not.
-    s"""
-      ${eval1.code}
-      boolean ${ev.isNull} = false;
-      boolean ${ev.value} = false;
+    if (!left.nullable && !right.nullable) {
+      ev.isNull = "false"
+      s"""
+        ${eval1.code}
+        boolean ${ev.value} = false;
 
-      if (!${eval1.isNull} && !${eval1.value}) {
-      } else {
-        ${eval2.code}
-        if (!${eval2.isNull} && !${eval2.value}) {
-        } else if (!${eval1.isNull} && !${eval2.isNull}) {
-          ${ev.value} = true;
+        if (${eval1.value}) {
+          ${eval2.code}
+          ${ev.value} = ${eval2.value};
+        }
+      """
+    } else {
+      s"""
+        ${eval1.code}
+        boolean ${ev.isNull} = false;
+        boolean ${ev.value} = false;
+
+        if (!${eval1.isNull} && !${eval1.value}) {
         } else {
-          ${ev.isNull} = true;
+          ${eval2.code}
+          if (!${eval2.isNull} && !${eval2.value}) {
+          } else if (!${eval1.isNull} && !${eval2.isNull}) {
+            ${ev.value} = true;
+          } else {
+            ${ev.isNull} = true;
+          }
         }
-      }
-     """
+      """
+    }
   }
 }
 
@@ -325,22 +338,35 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P
     val eval2 = right.gen(ctx)
 
     // The result should be `true`, if any of them is `true` whenever the other is null or not.
-    s"""
-      ${eval1.code}
-      boolean ${ev.isNull} = false;
-      boolean ${ev.value} = true;
+    if (!left.nullable && !right.nullable) {
+      ev.isNull = "false"
+      s"""
+        ${eval1.code}
+        boolean ${ev.value} = true;
 
-      if (!${eval1.isNull} && ${eval1.value}) {
-      } else {
-        ${eval2.code}
-        if (!${eval2.isNull} && ${eval2.value}) {
-        } else if (!${eval1.isNull} && !${eval2.isNull}) {
-          ${ev.value} = false;
+        if (!${eval1.value}) {
+          ${eval2.code}
+          ${ev.value} = ${eval2.value};
+        }
+      """
+    } else {
+      s"""
+        ${eval1.code}
+        boolean ${ev.isNull} = false;
+        boolean ${ev.value} = true;
+
+        if (!${eval1.isNull} && ${eval1.value}) {
         } else {
-          ${ev.isNull} = true;
+          ${eval2.code}
+          if (!${eval2.isNull} && ${eval2.value}) {
+          } else if (!${eval1.isNull} && !${eval2.isNull}) {
+            ${ev.value} = false;
+          } else {
+            ${ev.isNull} = true;
+          }
         }
-      }
-     """
+      """
+    }
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/AstBuilder.scala
new file mode 100644
index 0000000000000..5a64c414fb30d
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/AstBuilder.scala
@@ -0,0 +1,1452 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import java.sql.{Date, Timestamp}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import org.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.antlr.v4.runtime.tree.{ParseTree, TerminalNode}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.ng.SqlBaseParser._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.random.RandomSampler
+
+/**
+ * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
+ * TableIdentifier.
+ */
+class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
+  import ParserUtils._
+
+  protected def typedVisit[T](ctx: ParseTree): T = {
+    ctx.accept(this).asInstanceOf[T]
+  }
+
+  override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
+    visit(ctx.statement).asInstanceOf[LogicalPlan]
+  }
+
+  override def visitSingleExpression(ctx: SingleExpressionContext): Expression = withOrigin(ctx) {
+    visitNamedExpression(ctx.namedExpression)
+  }
+
+  override def visitSingleTableIdentifier(
+      ctx: SingleTableIdentifierContext): TableIdentifier = withOrigin(ctx) {
+    visitTableIdentifier(ctx.tableIdentifier)
+  }
+
+  override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
+    visit(ctx.dataType).asInstanceOf[DataType]
+  }
+
+  /* ********************************************************************************************
+   * Plan parsing
+   * ******************************************************************************************** */
+  protected def plan(tree: ParserRuleContext): LogicalPlan = typedVisit(tree)
+
+  /**
+   * Make sure we do not try to create a plan for a native command.
+   */
+  override def visitExecuteNativeCommand(ctx: ExecuteNativeCommandContext): LogicalPlan = null
+
+  /**
+   * Create a plan for a SHOW FUNCTIONS command.
+   */
+  override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) {
+    import ctx._
+    if (qualifiedName != null) {
+      val names = qualifiedName().identifier().asScala.map(_.getText).toList
+      names match {
+        case db :: name :: Nil =>
+          ShowFunctions(Some(db), Some(name))
+        case name :: Nil =>
+          ShowFunctions(None, Some(name))
+        case _ =>
+          throw new ParseException("SHOW FUNCTIONS unsupported name", ctx)
+      }
+    } else if (pattern != null) {
+      ShowFunctions(None, Some(string(pattern)))
+    } else {
+      ShowFunctions(None, None)
+    }
+  }
+
+  /**
+   * Create a plan for a DESCRIBE FUNCTION command.
+   */
+  override def visitDescribeFunction(ctx: DescribeFunctionContext): LogicalPlan = withOrigin(ctx) {
+    val functionName = ctx.qualifiedName().identifier().asScala.map(_.getText).mkString(".")
+    DescribeFunction(functionName, ctx.EXTENDED != null)
+  }
+
+  /**
+   * Create a top-level plan with Common Table Expressions.
+   */
+  override def visitQuery(ctx: QueryContext): LogicalPlan = withOrigin(ctx) {
+    val query = plan(ctx.queryNoWith)
+
+    // Apply CTEs
+    query.optional(ctx.ctes) {
+      val ctes = ctx.ctes.namedQuery.asScala.map {
+        case nCtx =>
+          val namedQuery = visitNamedQuery(nCtx)
+          (namedQuery.alias, namedQuery)
+      }
+
+      // Check for duplicate names.
+      ctes.groupBy(_._1).filter(_._2.size > 1).foreach {
+        case (name, _) =>
+          throw new ParseException(
+            s"Name '$name' is used for multiple common table expressions", ctx)
+      }
+
+      With(query, ctes.toMap)
+    }
+  }
+
+  /**
+   * Create a named logical plan.
+   *
+   * This is only used for Common Table Expressions.
+   */
+  override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) {
+    SubqueryAlias(ctx.name.getText, plan(ctx.queryNoWith))
+  }
+
+  /**
+   * Create a logical plan which allows for multiple inserts using one 'from' statement. These
+   * queries have the following SQL form:
+   * {{{
+   *   [WITH cte...]?
+   *   FROM src
+   *   [INSERT INTO tbl1 SELECT *]+
+   * }}}
+   * For example:
+   * {{{
+   *   FROM db.tbl1 A
+   *   INSERT INTO dbo.tbl1 SELECT * WHERE A.value = 10 LIMIT 5
+   *   INSERT INTO dbo.tbl2 SELECT * WHERE A.value = 12
+   * }}}
+   * This (Hive) feature cannot be combined with set-operators.
+   */
+  override def visitMultiInsertQuery(ctx: MultiInsertQueryContext): LogicalPlan = withOrigin(ctx) {
+    val from = visitFromClause(ctx.fromClause)
+
+    // Build the insert clauses.
+    val inserts = ctx.multiInsertQueryBody.asScala.map {
+      body =>
+        assert(body.querySpecification.fromClause == null,
+          "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements",
+          body)
+
+        withQuerySpecification(body.querySpecification, from).
+          // Add organization statements.
+          optionalMap(body.queryOrganization)(withQueryResultClauses).
+          // Add insert.
+          optionalMap(body.insertInto())(withInsertInto)
+    }
+
+    // If there are multiple INSERTS just UNION them together into one query.
+    inserts match {
+      case Seq(query) => query
+      case queries => Union(queries)
+    }
+  }
+
+  /**
+   * Create a logical plan for a regular (single-insert) query.
+   */
+  override def visitSingleInsertQuery(
+      ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.queryTerm).
+      // Add organization statements.
+      optionalMap(ctx.queryOrganization)(withQueryResultClauses).
+      // Add insert.
+      optionalMap(ctx.insertInto())(withInsertInto)
+  }
+
+  /**
+   * Add an INSERT INTO [TABLE]/INSERT OVERWRITE TABLE operation to the logical plan.
+   */
+  private def withInsertInto(
+      ctx: InsertIntoContext,
+      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
+    val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
+
+    InsertIntoTable(
+      UnresolvedRelation(tableIdent, None),
+      partitionKeys,
+      query,
+      ctx.OVERWRITE != null,
+      ctx.EXISTS != null)
+  }
+
+  /**
+   * Create a partition specification map.
+   */
+  override def visitPartitionSpec(
+      ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
+    ctx.partitionVal.asScala.map { pVal =>
+      val name = pVal.identifier.getText.toLowerCase
+      val value = Option(pVal.constant).map(visitStringConstant)
+      name -> value
+    }.toMap
+  }
+
+  /**
+   * Create a partition specification map without optional values.
+   */
+  protected def visitNonOptionalPartitionSpec(
+      ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) {
+    visitPartitionSpec(ctx).mapValues(_.orNull).map(identity)
+  }
+
+  /**
+   * Convert a constant of any type into a string. This is typically used in DDL commands, and its
+   * main purpose is to prevent slight differences due to back to back conversions i.e.:
+   * String -> Literal -> String.
+   */
+  protected def visitStringConstant(ctx: ConstantContext): String = withOrigin(ctx) {
+    ctx match {
+      case s: StringLiteralContext => createString(s)
+      case o => o.getText
+    }
+  }
+
+  /**
+   * Add ORDER BY/SORT BY/CLUSTER BY/DISTRIBUTE BY/LIMIT/WINDOWS clauses to the logical plan. These
+   * clauses determine the shape (ordering/partitioning/rows) of the query result.
+   */
+  private def withQueryResultClauses(
+      ctx: QueryOrganizationContext,
+      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    import ctx._
+
+    // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
+    val withOrder = if (
+      !order.isEmpty && sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) {
+      // ORDER BY ...
+      Sort(order.asScala.map(visitSortItem), global = true, query)
+    } else if (order.isEmpty && !sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) {
+      // SORT BY ...
+      Sort(sort.asScala.map(visitSortItem), global = false, query)
+    } else if (order.isEmpty && sort.isEmpty && !distributeBy.isEmpty && clusterBy.isEmpty) {
+      // DISTRIBUTE BY ...
+      RepartitionByExpression(expressionList(distributeBy), query)
+    } else if (order.isEmpty && !sort.isEmpty && !distributeBy.isEmpty && clusterBy.isEmpty) {
+      // SORT BY ... DISTRIBUTE BY ...
+      Sort(
+        sort.asScala.map(visitSortItem),
+        global = false,
+        RepartitionByExpression(expressionList(distributeBy), query))
+    } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && !clusterBy.isEmpty) {
+      // CLUSTER BY ...
+      val expressions = expressionList(clusterBy)
+      Sort(
+        expressions.map(SortOrder(_, Ascending)),
+        global = false,
+        RepartitionByExpression(expressions, query))
+    } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) {
+      // [EMPTY]
+      query
+    } else {
+      throw new ParseException(
+        "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported", ctx)
+    }
+
+    // WINDOWS
+    val withWindow = withOrder.optionalMap(windows)(withWindows)
+
+    // LIMIT
+    withWindow.optional(limit) {
+      Limit(typedVisit(limit), withWindow)
+    }
+  }
+
+  /**
+   * Create a logical plan using a query specification.
+   */
+  override def visitQuerySpecification(
+      ctx: QuerySpecificationContext): LogicalPlan = withOrigin(ctx) {
+    val from = OneRowRelation.optional(ctx.fromClause) {
+      visitFromClause(ctx.fromClause)
+    }
+    withQuerySpecification(ctx, from)
+  }
+
+  /**
+   * Add a query specification to a logical plan. The query specification is the core of the logical
+   * plan, this is where sourcing (FROM clause), transforming (SELECT TRANSFORM/MAP/REDUCE),
+   * projection (SELECT), aggregation (GROUP BY ... HAVING ...) and filtering (WHERE) takes place.
+   *
+   * Note that query hints are ignored (both by the parser and the builder).
+   */
+  private def withQuerySpecification(
+      ctx: QuerySpecificationContext,
+      relation: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    import ctx._
+
+    // WHERE
+    def filter(ctx: BooleanExpressionContext, plan: LogicalPlan): LogicalPlan = {
+      Filter(expression(ctx), plan)
+    }
+
+    // Expressions.
+    val expressions = Option(namedExpressionSeq).toSeq
+      .flatMap(_.namedExpression.asScala)
+      .map(typedVisit[Expression])
+
+    // Create either a transform or a regular query.
+    val specType = Option(kind).map(_.getType).getOrElse(SqlBaseParser.SELECT)
+    specType match {
+      case SqlBaseParser.MAP | SqlBaseParser.REDUCE | SqlBaseParser.TRANSFORM =>
+        // Transform
+
+        // Add where.
+        val withFilter = relation.optionalMap(where)(filter)
+
+        // Create the attributes.
+        val (attributes, schemaLess) = if (colTypeList != null) {
+          // Typed return columns.
+          (createStructType(colTypeList).toAttributes, false)
+        } else if (identifierSeq != null) {
+          // Untyped return columns.
+          val attrs = visitIdentifierSeq(identifierSeq).map { name =>
+            AttributeReference(name, StringType, nullable = true)()
+          }
+          (attrs, false)
+        } else {
+          (Seq(AttributeReference("key", StringType)(),
+            AttributeReference("value", StringType)()), true)
+        }
+
+        // Create the transform.
+        ScriptTransformation(
+          expressions,
+          string(script),
+          attributes,
+          withFilter,
+          withScriptIOSchema(inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess))
+
+      case SqlBaseParser.SELECT =>
+        // Regular select
+
+        // Add lateral views.
+        val withLateralView = ctx.lateralView.asScala.foldLeft(relation)(withGenerate)
+
+        // Add where.
+        val withFilter = withLateralView.optionalMap(where)(filter)
+
+        // Add aggregation or a project.
+        val namedExpressions = expressions.map {
+          case e: NamedExpression => e
+          case e: Expression => UnresolvedAlias(e)
+        }
+        val withProject = if (aggregation != null) {
+          withAggregation(aggregation, namedExpressions, withFilter)
+        } else if (namedExpressions.nonEmpty) {
+          Project(namedExpressions, withFilter)
+        } else {
+          withFilter
+        }
+
+        // Having
+        val withHaving = withProject.optional(having) {
+          // Note that we added a cast to boolean. If the expression itself is already boolean,
+          // the optimizer will get rid of the unnecessary cast.
+          Filter(Cast(expression(having), BooleanType), withProject)
+        }
+
+        // Distinct
+        val withDistinct = if (setQuantifier() != null && setQuantifier().DISTINCT() != null) {
+          Distinct(withHaving)
+        } else {
+          withHaving
+        }
+
+        // Window
+        withDistinct.optionalMap(windows)(withWindows)
+    }
+  }
+
+  /**
+   * Create a (Hive based) [[ScriptInputOutputSchema]].
+   */
+  protected def withScriptIOSchema(
+      inRowFormat: RowFormatContext,
+      recordWriter: Token,
+      outRowFormat: RowFormatContext,
+      recordReader: Token,
+      schemaLess: Boolean): ScriptInputOutputSchema = null
+
+  /**
+   * Create a logical plan for a given 'FROM' clause. Note that we support multiple (comma
+   * separated) relations here, these get converted into a single plan by condition-less inner join.
+   */
+  override def visitFromClause(ctx: FromClauseContext): LogicalPlan = withOrigin(ctx) {
+    val from = ctx.relation.asScala.map(plan).reduceLeft(Join(_, _, Inner, None))
+    ctx.lateralView.asScala.foldLeft(from)(withGenerate)
+  }
+
+  /**
+   * Connect two queries by a Set operator.
+   *
+   * Supported Set operators are:
+   * - UNION [DISTINCT]
+   * - UNION ALL
+   * - EXCEPT [DISTINCT]
+   * - INTERSECT [DISTINCT]
+   */
+  override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = withOrigin(ctx) {
+    val left = plan(ctx.left)
+    val right = plan(ctx.right)
+    val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
+    ctx.operator.getType match {
+      case SqlBaseParser.UNION if all =>
+        Union(left, right)
+      case SqlBaseParser.UNION =>
+        Distinct(Union(left, right))
+      case SqlBaseParser.INTERSECT if all =>
+        throw new ParseException("INTERSECT ALL is not supported.", ctx)
+      case SqlBaseParser.INTERSECT =>
+        Intersect(left, right)
+      case SqlBaseParser.EXCEPT if all =>
+        throw new ParseException("EXCEPT ALL is not supported.", ctx)
+      case SqlBaseParser.EXCEPT =>
+        Except(left, right)
+    }
+  }
+
+  /**
+   * Add a [[WithWindowDefinition]] operator to a logical plan.
+   */
+  private def withWindows(
+      ctx: WindowsContext,
+      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    // Collect all window specifications defined in the WINDOW clause.
+    val baseWindowMap = ctx.namedWindow.asScala.map {
+      wCtx =>
+        (wCtx.identifier.getText, typedVisit[WindowSpec](wCtx.windowSpec))
+    }.toMap
+
+    // Handle cases like
+    // window w1 as (partition by p_mfgr order by p_name
+    //               range between 2 preceding and 2 following),
+    //        w2 as w1
+    val windowMapView = baseWindowMap.mapValues {
+      case WindowSpecReference(name) =>
+        baseWindowMap.get(name) match {
+          case Some(spec: WindowSpecDefinition) =>
+            spec
+          case Some(ref) =>
+            throw new ParseException(s"Window reference '$name' is not a window specification", ctx)
+          case None =>
+            throw new ParseException(s"Cannot resolve window reference '$name'", ctx)
+        }
+      case spec: WindowSpecDefinition => spec
+    }
+
+    // Note that mapValues creates a view instead of materialized map. We force materialization by
+    // mapping over identity.
+    WithWindowDefinition(windowMapView.map(identity), query)
+  }
+
+  /**
+   * Add an [[Aggregate]] to a logical plan.
+   */
+  private def withAggregation(
+      ctx: AggregationContext,
+      selectExpressions: Seq[NamedExpression],
+      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    import ctx._
+    val groupByExpressions = expressionList(groupingExpressions)
+
+    if (GROUPING != null) {
+      // GROUP BY .... GROUPING SETS (...)
+      val expressionMap = groupByExpressions.zipWithIndex.toMap
+      val numExpressions = expressionMap.size
+      val mask = (1 << numExpressions) - 1
+      val masks = ctx.groupingSet.asScala.map {
+        _.expression.asScala.foldLeft(mask) {
+          case (bitmap, eCtx) =>
+            // Find the index of the expression.
+            val e = typedVisit[Expression](eCtx)
+            val index = expressionMap.find(_._1.semanticEquals(e)).map(_._2).getOrElse(
+              throw new ParseException(
+                s"$e doesn't show up in the GROUP BY list", ctx))
+            // 0 means that the column at the given index is a grouping column, 1 means it is not,
+            // so we unset the bit in bitmap.
+            bitmap & ~(1 << (numExpressions - 1 - index))
+        }
+      }
+      GroupingSets(masks, groupByExpressions, query, selectExpressions)
+    } else {
+      // GROUP BY .... (WITH CUBE | WITH ROLLUP)?
+      val mappedGroupByExpressions = if (CUBE != null) {
+        Seq(Cube(groupByExpressions))
+      } else if (ROLLUP != null) {
+        Seq(Rollup(groupByExpressions))
+      } else {
+        groupByExpressions
+      }
+      Aggregate(mappedGroupByExpressions, selectExpressions, query)
+    }
+  }
+
+  /**
+   * Add a [[Generate]] (Lateral View) to a logical plan.
+   */
+  private def withGenerate(
+      query: LogicalPlan,
+      ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) {
+    val expressions = expressionList(ctx.expression)
+
+    // Create the generator.
+    val generator = ctx.qualifiedName.getText.toLowerCase match {
+      case "explode" if expressions.size == 1 =>
+        Explode(expressions.head)
+      case "json_tuple" =>
+        JsonTuple(expressions)
+      case other =>
+        withGenerator(other, expressions, ctx)
+    }
+
+    Generate(
+      generator,
+      join = true,
+      outer = ctx.OUTER != null,
+      Some(ctx.tblName.getText.toLowerCase),
+      ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.apply),
+      query)
+  }
+
+  /**
+   * Create a [[Generator]]. Override this method in order to support custom Generators.
+   */
+  protected def withGenerator(
+      name: String,
+      expressions: Seq[Expression],
+      ctx: LateralViewContext): Generator = {
+    throw new ParseException(s"Generator function '$name' is not supported", ctx)
+  }
+
+  /**
+   * Create a joins between two or more logical plans.
+   */
+  override def visitJoinRelation(ctx: JoinRelationContext): LogicalPlan = withOrigin(ctx) {
+    /** Build a join between two plans. */
+    def join(ctx: JoinRelationContext, left: LogicalPlan, right: LogicalPlan): Join = {
+      val baseJoinType = ctx.joinType match {
+        case null => Inner
+        case jt if jt.FULL != null => FullOuter
+        case jt if jt.SEMI != null => LeftSemi
+        case jt if jt.LEFT != null => LeftOuter
+        case jt if jt.RIGHT != null => RightOuter
+        case _ => Inner
+      }
+
+      // Resolve the join type and join condition
+      val (joinType, condition) = Option(ctx.joinCriteria) match {
+        case Some(c) if c.USING != null =>
+          val columns = c.identifier.asScala.map { column =>
+            UnresolvedAttribute.quoted(column.getText)
+          }
+          (UsingJoin(baseJoinType, columns), None)
+        case Some(c) if c.booleanExpression != null =>
+          (baseJoinType, Option(expression(c.booleanExpression)))
+        case None if ctx.NATURAL != null =>
+          (NaturalJoin(baseJoinType), None)
+        case None =>
+          (baseJoinType, None)
+      }
+      Join(left, right, joinType, condition)
+    }
+
+    // Handle all consecutive join clauses. ANTLR produces a right nested tree in which the the
+    // first join clause is at the top. However fields of previously referenced tables can be used
+    // in following join clauses. The tree needs to be reversed in order to make this work.
+    var result = plan(ctx.left)
+    var current = ctx
+    while (current != null) {
+      current.right match {
+        case right: JoinRelationContext =>
+          result = join(current, result, plan(right.left))
+          current = right
+        case right =>
+          result = join(current, result, plan(right))
+          current = null
+      }
+    }
+    result
+  }
+
+  /**
+   * Add a [[Sample]] to a logical plan.
+   *
+   * This currently supports the following sampling methods:
+   * - TABLESAMPLE(x ROWS): Sample the table down to the given number of rows.
+   * - TABLESAMPLE(x PERCENT): Sample the table down to the given percentage. Note that percentages
+   * are defined as a number between 0 and 100.
+   * - TABLESAMPLE(BUCKET x OUT OF y): Sample the table down to a 'x' divided by 'y' fraction.
+   */
+  private def withSample(ctx: SampleContext, query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    // Create a sampled plan if we need one.
+    def sample(fraction: Double): Sample = {
+      // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling
+      // function takes X PERCENT as the input and the range of X is [0, 100], we need to
+      // adjust the fraction.
+      val eps = RandomSampler.roundingEpsilon
+      assert(fraction >= 0.0 - eps && fraction <= 1.0 + eps,
+        s"Sampling fraction ($fraction) must be on interval [0, 1]",
+        ctx)
+      Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, query)(true)
+    }
+
+    ctx.sampleType.getType match {
+      case SqlBaseParser.ROWS =>
+        Limit(expression(ctx.expression), query)
+
+      case SqlBaseParser.PERCENTLIT =>
+        val fraction = ctx.percentage.getText.toDouble
+        sample(fraction / 100.0d)
+
+      case SqlBaseParser.BUCKET if ctx.ON != null =>
+        throw new ParseException("TABLESAMPLE(BUCKET x OUT OF y ON id) is not supported", ctx)
+
+      case SqlBaseParser.BUCKET =>
+        sample(ctx.numerator.getText.toDouble / ctx.denominator.getText.toDouble)
+    }
+  }
+
+  /**
+   * Create a logical plan for a sub-query.
+   */
+  override def visitSubquery(ctx: SubqueryContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.queryNoWith)
+  }
+
+  /**
+   * Create an un-aliased table reference. This is typically used for top-level table references,
+   * for example:
+   * {{{
+   *   INSERT INTO db.tbl2
+   *   TABLE db.tbl1
+   * }}}
+   */
+  override def visitTable(ctx: TableContext): LogicalPlan = withOrigin(ctx) {
+    UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier), None)
+  }
+
+  /**
+   * Create an aliased table reference. This is typically used in FROM clauses.
+   */
+  override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) {
+    val table = UnresolvedRelation(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.identifier).map(_.getText))
+    table.optionalMap(ctx.sample)(withSample)
+  }
+
+  /**
+   * Create an inline table (a virtual table in Hive parlance).
+   */
+  override def visitInlineTable(ctx: InlineTableContext): LogicalPlan = withOrigin(ctx) {
+    // Get the backing expressions.
+    val expressions = ctx.expression.asScala.map { eCtx =>
+      val e = expression(eCtx)
+      assert(e.foldable, "All expressions in an inline table must be constants.", eCtx)
+      e
+    }
+
+    // Validate and evaluate the rows.
+    val (structType, structConstructor) = expressions.head.dataType match {
+      case st: StructType =>
+        (st, (e: Expression) => e)
+      case dt =>
+        val st = CreateStruct(Seq(expressions.head)).dataType
+        (st, (e: Expression) => CreateStruct(Seq(e)))
+    }
+    val rows = expressions.map {
+      case expression =>
+        val safe = Cast(structConstructor(expression), structType)
+        safe.eval().asInstanceOf[InternalRow]
+    }
+
+    // Construct attributes.
+    val baseAttributes = structType.toAttributes.map(_.withNullability(true))
+    val attributes = if (ctx.identifierList != null) {
+      val aliases = visitIdentifierList(ctx.identifierList)
+      assert(aliases.size == baseAttributes.size,
+        "Number of aliases must match the number of fields in an inline table.", ctx)
+      baseAttributes.zip(aliases).map(p => p._1.withName(p._2))
+    } else {
+      baseAttributes
+    }
+
+    // Create plan and add an alias if a name has been defined.
+    LocalRelation(attributes, rows).optionalMap(ctx.identifier)(aliasPlan)
+  }
+
+  /**
+   * Create an alias (SubqueryAlias) for a join relation. This is practically the same as
+   * visitAliasedQuery and visitNamedExpression, ANTLR4 however requires us to use 3 different
+   * hooks.
+   */
+  override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.relation).optionalMap(ctx.sample)(withSample).optionalMap(ctx.identifier)(aliasPlan)
+  }
+
+  /**
+   * Create an alias (SubqueryAlias) for a sub-query. This is practically the same as
+   * visitAliasedRelation and visitNamedExpression, ANTLR4 however requires us to use 3 different
+   * hooks.
+   */
+  override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
+    plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample).optionalMap(ctx.identifier)(aliasPlan)
+  }
+
+  /**
+   * Create an alias (SubqueryAlias) for a LogicalPlan.
+   */
+  private def aliasPlan(alias: IdentifierContext, plan: LogicalPlan): LogicalPlan = {
+    SubqueryAlias(alias.getText, plan)
+  }
+
+  /**
+   * Create a Sequence of Strings for a parenthesis enclosed alias list.
+   */
+  override def visitIdentifierList(ctx: IdentifierListContext): Seq[String] = withOrigin(ctx) {
+    visitIdentifierSeq(ctx.identifierSeq)
+  }
+
+  /**
+   * Create a Sequence of Strings for an identifier list.
+   */
+  override def visitIdentifierSeq(ctx: IdentifierSeqContext): Seq[String] = withOrigin(ctx) {
+    ctx.identifier.asScala.map(_.getText)
+  }
+
+  /* ********************************************************************************************
+   * Table Identifier parsing
+   * ******************************************************************************************** */
+  /**
+   * Create a [[TableIdentifier]] from a 'tableName' or 'databaseName'.'tableName' pattern.
+   */
+  override def visitTableIdentifier(
+      ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) {
+    TableIdentifier(ctx.table.getText, Option(ctx.db).map(_.getText))
+  }
+
+  /* ********************************************************************************************
+   * Expression parsing
+   * ******************************************************************************************** */
+  /**
+   * Create an expression from the given context. This method just passes the context on to the
+   * vistor and only takes care of typing (We assume that the visitor returns an Expression here).
+   */
+  protected def expression(ctx: ParserRuleContext): Expression = typedVisit(ctx)
+
+  /**
+   * Create sequence of expressions from the given sequence of contexts.
+   */
+  private def expressionList(trees: java.util.List[ExpressionContext]): Seq[Expression] = {
+    trees.asScala.map(expression)
+  }
+
+  /**
+   * Invert a boolean expression if it has a valid NOT clause.
+   */
+  private def invertIfNotDefined(expression: Expression, not: TerminalNode): Expression = {
+    if (not != null) {
+      Not(expression)
+    } else {
+      expression
+    }
+  }
+
+  /**
+   * Create a star (i.e. all) expression; this selects all elements (in the specified object).
+   * Both un-targeted (global) and targeted aliases are supported.
+   */
+  override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) {
+    UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText)))
+  }
+
+  /**
+   * Create an aliased expression if an alias is specified. Both single and multi-aliases are
+   * supported.
+   */
+  override def visitNamedExpression(ctx: NamedExpressionContext): Expression = withOrigin(ctx) {
+    val e = expression(ctx.expression)
+    if (ctx.identifier != null) {
+      Alias(e, ctx.identifier.getText)()
+    } else if (ctx.identifierList != null) {
+      MultiAlias(e, visitIdentifierList(ctx.identifierList))
+    } else {
+      e
+    }
+  }
+
+  /**
+   * Combine a number of boolean expressions into a balanced expression tree. These expressions are
+   * either combined by a logical [[And]] or a logical [[Or]].
+   *
+   * A balanced binary tree is created because regular left recursive trees cause considerable
+   * performance degradations and can cause stack overflows.
+   */
+  override def visitLogicalBinary(ctx: LogicalBinaryContext): Expression = withOrigin(ctx) {
+    val expressionType = ctx.operator.getType
+    val expressionCombiner = expressionType match {
+      case SqlBaseParser.AND => And.apply _
+      case SqlBaseParser.OR => Or.apply _
+    }
+
+    // Collect all similar left hand contexts.
+    val contexts = ArrayBuffer(ctx.right)
+    var current = ctx.left
+    def collectContexts: Boolean = current match {
+      case lbc: LogicalBinaryContext if lbc.operator.getType == expressionType =>
+        contexts += lbc.right
+        current = lbc.left
+        true
+      case _ =>
+        contexts += current
+        false
+    }
+    while (collectContexts) {
+      // No body - all updates take place in the collectContexts.
+    }
+
+    // Reverse the contexts to have them in the same sequence as in the SQL statement & turn them
+    // into expressions.
+    val expressions = contexts.reverse.map(expression)
+
+    // Create a balanced tree.
+    def reduceToExpressionTree(low: Int, high: Int): Expression = high - low match {
+      case 0 =>
+        expressions(low)
+      case 1 =>
+        expressionCombiner(expressions(low), expressions(high))
+      case x =>
+        val mid = low + x / 2
+        expressionCombiner(
+          reduceToExpressionTree(low, mid),
+          reduceToExpressionTree(mid + 1, high))
+    }
+    reduceToExpressionTree(0, expressions.size - 1)
+  }
+
+  /**
+   * Invert a boolean expression.
+   */
+  override def visitLogicalNot(ctx: LogicalNotContext): Expression = withOrigin(ctx) {
+    Not(expression(ctx.booleanExpression()))
+  }
+
+  /**
+   * Create a filtering correlated sub-query. This is not supported yet.
+   */
+  override def visitExists(ctx: ExistsContext): Expression = {
+    throw new ParseException("EXISTS clauses are not supported.", ctx)
+  }
+
+  /**
+   * Create a comparison expression. This compares two expressions. The following comparison
+   * operators are supported:
+   * - Equal: '=' or '=='
+   * - Null-safe Equal: '<=>'
+   * - Not Equal: '<>' or '!='
+   * - Less than: '<'
+   * - Less then or Equal: '<='
+   * - Greater than: '>'
+   * - Greater then or Equal: '>='
+   */
+  override def visitComparison(ctx: ComparisonContext): Expression = withOrigin(ctx) {
+    val left = expression(ctx.left)
+    val right = expression(ctx.right)
+    val operator = ctx.comparisonOperator().getChild(0).asInstanceOf[TerminalNode]
+    operator.getSymbol.getType match {
+      case SqlBaseParser.EQ =>
+        EqualTo(left, right)
+      case SqlBaseParser.NSEQ =>
+        EqualNullSafe(left, right)
+      case SqlBaseParser.NEQ | SqlBaseParser.NEQJ =>
+        Not(EqualTo(left, right))
+      case SqlBaseParser.LT =>
+        LessThan(left, right)
+      case SqlBaseParser.LTE =>
+        LessThanOrEqual(left, right)
+      case SqlBaseParser.GT =>
+        GreaterThan(left, right)
+      case SqlBaseParser.GTE =>
+        GreaterThanOrEqual(left, right)
+    }
+  }
+
+  /**
+   * Create a BETWEEN expression. This tests if an expression lies with in the bounds set by two
+   * other expressions. The inverse can also be created.
+   */
+  override def visitBetween(ctx: BetweenContext): Expression = withOrigin(ctx) {
+    val value = expression(ctx.value)
+    val between = And(
+      GreaterThanOrEqual(value, expression(ctx.lower)),
+      LessThanOrEqual(value, expression(ctx.upper)))
+    invertIfNotDefined(between, ctx.NOT)
+  }
+
+  /**
+   * Create an IN expression. This tests if the value of the left hand side expression is
+   * contained by the sequence of expressions on the right hand side.
+   */
+  override def visitInList(ctx: InListContext): Expression = withOrigin(ctx) {
+    val in = In(expression(ctx.value), ctx.expression().asScala.map(expression))
+    invertIfNotDefined(in, ctx.NOT)
+  }
+
+  /**
+   * Create an IN expression, where the the right hand side is a query. This is unsupported.
+   */
+  override def visitInSubquery(ctx: InSubqueryContext): Expression = {
+    throw new ParseException("IN with a Sub-query is currently not supported.", ctx)
+  }
+
+  /**
+   * Create a (R)LIKE/REGEXP expression.
+   */
+  override def visitLike(ctx: LikeContext): Expression = {
+    val left = expression(ctx.value)
+    val right = expression(ctx.pattern)
+    val like = ctx.like.getType match {
+      case SqlBaseParser.LIKE =>
+        Like(left, right)
+      case SqlBaseParser.RLIKE =>
+        RLike(left, right)
+    }
+    invertIfNotDefined(like, ctx.NOT)
+  }
+
+  /**
+   * Create an IS (NOT) NULL expression.
+   */
+  override def visitNullPredicate(ctx: NullPredicateContext): Expression = withOrigin(ctx) {
+    val value = expression(ctx.value)
+    if (ctx.NOT != null) {
+      IsNotNull(value)
+    } else {
+      IsNull(value)
+    }
+  }
+
+  /**
+   * Create a binary arithmetic expression. The following arithmetic operators are supported:
+   * - Mulitplication: '*'
+   * - Division: '/'
+   * - Hive Long Division: 'DIV'
+   * - Modulo: '%'
+   * - Addition: '+'
+   * - Subtraction: '-'
+   * - Binary AND: '&'
+   * - Binary XOR
+   * - Binary OR: '|'
+   */
+  override def visitArithmeticBinary(ctx: ArithmeticBinaryContext): Expression = withOrigin(ctx) {
+    val left = expression(ctx.left)
+    val right = expression(ctx.right)
+    ctx.operator.getType match {
+      case SqlBaseParser.ASTERISK =>
+        Multiply(left, right)
+      case SqlBaseParser.SLASH =>
+        Divide(left, right)
+      case SqlBaseParser.PERCENT =>
+        Remainder(left, right)
+      case SqlBaseParser.DIV =>
+        Cast(Divide(left, right), LongType)
+      case SqlBaseParser.PLUS =>
+        Add(left, right)
+      case SqlBaseParser.MINUS =>
+        Subtract(left, right)
+      case SqlBaseParser.AMPERSAND =>
+        BitwiseAnd(left, right)
+      case SqlBaseParser.HAT =>
+        BitwiseXor(left, right)
+      case SqlBaseParser.PIPE =>
+        BitwiseOr(left, right)
+    }
+  }
+
+  /**
+   * Create a unary arithmetic expression. The following arithmetic operators are supported:
+   * - Plus: '+'
+   * - Minus: '-'
+   * - Bitwise Not: '~'
+   */
+  override def visitArithmeticUnary(ctx: ArithmeticUnaryContext): Expression = withOrigin(ctx) {
+    val value = expression(ctx.valueExpression)
+    ctx.operator.getType match {
+      case SqlBaseParser.PLUS =>
+        value
+      case SqlBaseParser.MINUS =>
+        UnaryMinus(value)
+      case SqlBaseParser.TILDE =>
+        BitwiseNot(value)
+    }
+  }
+
+  /**
+   * Create a [[Cast]] expression.
+   */
+  override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) {
+    Cast(expression(ctx.expression), typedVisit(ctx.dataType))
+  }
+
+  /**
+   * Create a (windowed) Function expression.
+   */
+  override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
+    // Create the function call.
+    val name = ctx.qualifiedName.getText
+    val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
+    val arguments = ctx.expression().asScala.map(expression) match {
+      case Seq(UnresolvedStar(None)) if name.toLowerCase == "count" && !isDistinct =>
+        // Transform COUNT(*) into COUNT(1). Move this to analysis?
+        Seq(Literal(1))
+      case expressions =>
+        expressions
+    }
+    val function = UnresolvedFunction(name, arguments, isDistinct)
+
+    // Check if the function is evaluated in a windowed context.
+    ctx.windowSpec match {
+      case spec: WindowRefContext =>
+        UnresolvedWindowExpression(function, visitWindowRef(spec))
+      case spec: WindowDefContext =>
+        WindowExpression(function, visitWindowDef(spec))
+      case _ => function
+    }
+  }
+
+  /**
+   * Create a reference to a window frame, i.e. [[WindowSpecReference]].
+   */
+  override def visitWindowRef(ctx: WindowRefContext): WindowSpecReference = withOrigin(ctx) {
+    WindowSpecReference(ctx.identifier.getText)
+  }
+
+  /**
+   * Create a window definition, i.e. [[WindowSpecDefinition]].
+   */
+  override def visitWindowDef(ctx: WindowDefContext): WindowSpecDefinition = withOrigin(ctx) {
+    // CLUSTER BY ... | PARTITION BY ... ORDER BY ...
+    val partition = ctx.partition.asScala.map(expression)
+    val order = ctx.sortItem.asScala.map(visitSortItem)
+
+    // RANGE/ROWS BETWEEN ...
+    val frameSpecOption = Option(ctx.windowFrame).map { frame =>
+      val frameType = frame.frameType.getType match {
+        case SqlBaseParser.RANGE => RangeFrame
+        case SqlBaseParser.ROWS => RowFrame
+      }
+
+      SpecifiedWindowFrame(
+        frameType,
+        visitFrameBound(frame.start),
+        Option(frame.end).map(visitFrameBound).getOrElse(CurrentRow))
+    }
+
+    WindowSpecDefinition(
+      partition,
+      order,
+      frameSpecOption.getOrElse(UnspecifiedFrame))
+  }
+
+  /**
+   * Create or resolve a [[FrameBoundary]]. Simple math expressions are allowed for Value
+   * Preceding/Following boundaries. These expressions must be constant (foldable) and return an
+   * integer value.
+   */
+  override def visitFrameBound(ctx: FrameBoundContext): FrameBoundary = withOrigin(ctx) {
+    // We currently only allow foldable integers.
+    def value: Int = {
+      val e = expression(ctx.expression)
+      assert(e.resolved && e.foldable && e.dataType == IntegerType,
+        "Frame bound value must be a constant integer.",
+        ctx)
+      e.eval().asInstanceOf[Int]
+    }
+
+    // Create the FrameBoundary
+    ctx.boundType.getType match {
+      case SqlBaseParser.PRECEDING if ctx.UNBOUNDED != null =>
+        UnboundedPreceding
+      case SqlBaseParser.PRECEDING =>
+        ValuePreceding(value)
+      case SqlBaseParser.CURRENT =>
+        CurrentRow
+      case SqlBaseParser.FOLLOWING if ctx.UNBOUNDED != null =>
+        UnboundedFollowing
+      case SqlBaseParser.FOLLOWING =>
+        ValueFollowing(value)
+    }
+  }
+
+  /**
+   * Create a [[CreateStruct]] expression.
+   */
+  override def visitRowConstructor(ctx: RowConstructorContext): Expression = withOrigin(ctx) {
+    CreateStruct(ctx.expression.asScala.map(expression))
+  }
+
+  /**
+   * Create a [[ScalarSubquery]] expression.
+   */
+  override def visitSubqueryExpression(
+      ctx: SubqueryExpressionContext): Expression = withOrigin(ctx) {
+    ScalarSubquery(plan(ctx.query))
+  }
+
+  /**
+   * Create a value based [[CaseWhen]] expression. This has the following SQL form:
+   * {{{
+   *   CASE [expression]
+   *    WHEN [value] THEN [expression]
+   *    ...
+   *    ELSE [expression]
+   *   END
+   * }}}
+   */
+  override def visitSimpleCase(ctx: SimpleCaseContext): Expression = withOrigin(ctx) {
+    val e = expression(ctx.valueExpression)
+    val branches = ctx.whenClause.asScala.map { wCtx =>
+      (EqualTo(e, expression(wCtx.condition)), expression(wCtx.result))
+    }
+    CaseWhen(branches, Option(ctx.elseExpression).map(expression))
+  }
+
+  /**
+   * Create a condition based [[CaseWhen]] expression. This has the following SQL syntax:
+   * {{{
+   *   CASE
+   *    WHEN [predicate] THEN [expression]
+   *    ...
+   *    ELSE [expression]
+   *   END
+   * }}}
+   *
+   * @param ctx the parse tree
+   *    */
+  override def visitSearchedCase(ctx: SearchedCaseContext): Expression = withOrigin(ctx) {
+    val branches = ctx.whenClause.asScala.map { wCtx =>
+      (expression(wCtx.condition), expression(wCtx.result))
+    }
+    CaseWhen(branches, Option(ctx.elseExpression).map(expression))
+  }
+
+  /**
+   * Create a dereference expression. The return type depends on the type of the parent, this can
+   * either be a [[UnresolvedAttribute]] (if the parent is an [[UnresolvedAttribute]]), or an
+   * [[UnresolvedExtractValue]] if the parent is some expression.
+   */
+  override def visitDereference(ctx: DereferenceContext): Expression = withOrigin(ctx) {
+    val attr = ctx.fieldName.getText
+    expression(ctx.base) match {
+      case UnresolvedAttribute(nameParts) =>
+        UnresolvedAttribute(nameParts :+ attr)
+      case e =>
+        UnresolvedExtractValue(e, Literal(attr))
+    }
+  }
+
+  /**
+   * Create an [[UnresolvedAttribute]] expression.
+   */
+  override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) {
+    UnresolvedAttribute.quoted(ctx.getText)
+  }
+
+  /**
+   * Create an [[UnresolvedExtractValue]] expression, this is used for subscript access to an array.
+   */
+  override def visitSubscript(ctx: SubscriptContext): Expression = withOrigin(ctx) {
+    UnresolvedExtractValue(expression(ctx.value), expression(ctx.index))
+  }
+
+  /**
+   * Create an expression for an expression between parentheses. This is need because the ANTLR
+   * visitor cannot automatically convert the nested context into an expression.
+   */
+  override def visitParenthesizedExpression(
+     ctx: ParenthesizedExpressionContext): Expression = withOrigin(ctx) {
+    expression(ctx.expression)
+  }
+
+  /**
+   * Create a [[SortOrder]] expression.
+   */
+  override def visitSortItem(ctx: SortItemContext): SortOrder = withOrigin(ctx) {
+    if (ctx.DESC != null) {
+      SortOrder(expression(ctx.expression), Descending)
+    } else {
+      SortOrder(expression(ctx.expression), Ascending)
+    }
+  }
+
+  /**
+   * Create a typed Literal expression. A typed literal has the following SQL syntax:
+   * {{{
+   *   [TYPE] '[VALUE]'
+   * }}}
+   * Currently Date and Timestamp typed literals are supported.
+   *
+   * TODO what the added value of this over casting?
+   */
+  override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
+    val value = string(ctx.STRING)
+    ctx.identifier.getText.toUpperCase match {
+      case "DATE" =>
+        Literal(Date.valueOf(value))
+      case "TIMESTAMP" =>
+        Literal(Timestamp.valueOf(value))
+      case other =>
+        throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
+    }
+  }
+
+  /**
+   * Create a NULL literal expression.
+   */
+  override def visitNullLiteral(ctx: NullLiteralContext): Literal = withOrigin(ctx) {
+    Literal(null)
+  }
+
+  /**
+   * Create a Boolean literal expression.
+   */
+  override def visitBooleanLiteral(ctx: BooleanLiteralContext): Literal = withOrigin(ctx) {
+    if (ctx.getText.toBoolean) {
+      Literal.TrueLiteral
+    } else {
+      Literal.FalseLiteral
+    }
+  }
+
+  /**
+   * Create an integral literal expression. The code selects the most narrow integral type
+   * possible, either a BigDecimal, a Long or an Integer is returned.
+   */
+  override def visitIntegerLiteral(ctx: IntegerLiteralContext): Literal = withOrigin(ctx) {
+    BigDecimal(ctx.getText) match {
+      case v if v.isValidInt =>
+        Literal(v.intValue())
+      case v if v.isValidLong =>
+        Literal(v.longValue())
+      case v => Literal(v.underlying())
+    }
+  }
+
+  /**
+   * Create a double literal for a number denoted in scientifc notation.
+   */
+  override def visitScientificDecimalLiteral(
+      ctx: ScientificDecimalLiteralContext): Literal = withOrigin(ctx) {
+    Literal(ctx.getText.toDouble)
+  }
+
+  /**
+   * Create a decimal literal for a regular decimal number.
+   */
+  override def visitDecimalLiteral(ctx: DecimalLiteralContext): Literal = withOrigin(ctx) {
+    Literal(BigDecimal(ctx.getText).underlying())
+  }
+
+  /** Create a numeric literal expression. */
+  private def numericLiteral(ctx: NumberContext)(f: String => Any): Literal = withOrigin(ctx) {
+    val raw = ctx.getText
+    try {
+      Literal(f(raw.substring(0, raw.length - 1)))
+    } catch {
+      case e: NumberFormatException =>
+        throw new ParseException(e.getMessage, ctx)
+    }
+  }
+
+  /**
+   * Create a Byte Literal expression.
+   */
+  override def visitTinyIntLiteral(ctx: TinyIntLiteralContext): Literal = numericLiteral(ctx) {
+    _.toByte
+  }
+
+  /**
+   * Create a Short Literal expression.
+   */
+  override def visitSmallIntLiteral(ctx: SmallIntLiteralContext): Literal = numericLiteral(ctx) {
+    _.toShort
+  }
+
+  /**
+   * Create a Long Literal expression.
+   */
+  override def visitBigIntLiteral(ctx: BigIntLiteralContext): Literal = numericLiteral(ctx) {
+    _.toLong
+  }
+
+  /**
+   * Create a Double Literal expression.
+   */
+  override def visitDoubleLiteral(ctx: DoubleLiteralContext): Literal = numericLiteral(ctx) {
+    _.toDouble
+  }
+
+  /**
+   * Create a String literal expression.
+   */
+  override def visitStringLiteral(ctx: StringLiteralContext): Literal = withOrigin(ctx) {
+    Literal(createString(ctx))
+  }
+
+  /**
+   * Create a String from a string literal context. This supports multiple consecutive string
+   * literals, these are concatenated, for example this expression "'hello' 'world'" will be
+   * converted into "helloworld".
+   *
+   * Special characters can be escaped by using Hive/C-style escaping.
+   */
+  private def createString(ctx: StringLiteralContext): String = {
+    ctx.STRING().asScala.map(string).mkString
+  }
+
+  /**
+   * Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple
+   * unit value pairs, for instance: interval 2 months 2 days.
+   */
+  override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) {
+    val intervals = ctx.intervalField.asScala.map(visitIntervalField)
+    assert(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx)
+    Literal(intervals.reduce(_.add(_)))
+  }
+
+  /**
+   * Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are
+   * supported:
+   * - Single unit.
+   * - From-To unit (only 'YEAR TO MONTH' and 'DAY TO SECOND' are supported).
+   */
+  override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) {
+    import ctx._
+    val s = value.getText
+    val interval = (unit.getText.toLowerCase, Option(to).map(_.getText.toLowerCase)) match {
+      case (u, None) if u.endsWith("s") =>
+        // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
+        CalendarInterval.fromSingleUnitString(u.substring(0, u.length - 1), s)
+      case (u, None) =>
+        CalendarInterval.fromSingleUnitString(u, s)
+      case ("year", Some("month")) =>
+        CalendarInterval.fromYearMonthString(s)
+      case ("day", Some("second")) =>
+        CalendarInterval.fromDayTimeString(s)
+      case (from, Some(t)) =>
+        throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
+    }
+    assert(interval != null, "No interval can be constructed", ctx)
+    interval
+  }
+
+  /* ********************************************************************************************
+   * DataType parsing
+   * ******************************************************************************************** */
+  /**
+   * Resolve/create a primitive type.
+   */
+  override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) {
+    (ctx.identifier.getText.toLowerCase, ctx.INTEGER_VALUE().asScala.toList) match {
+      case ("boolean", Nil) => BooleanType
+      case ("tinyint" | "byte", Nil) => ByteType
+      case ("smallint" | "short", Nil) => ShortType
+      case ("int" | "integer", Nil) => IntegerType
+      case ("bigint" | "long", Nil) => LongType
+      case ("float", Nil) => FloatType
+      case ("double", Nil) => DoubleType
+      case ("date", Nil) => DateType
+      case ("timestamp", Nil) => TimestampType
+      case ("char" | "varchar" | "string", Nil) => StringType
+      case ("char" | "varchar", _ :: Nil) => StringType
+      case ("binary", Nil) => BinaryType
+      case ("decimal", Nil) => DecimalType.USER_DEFAULT
+      case ("decimal", precision :: Nil) => DecimalType(precision.getText.toInt, 0)
+      case ("decimal", precision :: scale :: Nil) =>
+        DecimalType(precision.getText.toInt, scale.getText.toInt)
+      case (dt, params) =>
+        throw new ParseException(
+          s"DataType $dt${params.mkString("(", ",", ")")} is not supported.", ctx)
+    }
+  }
+
+  /**
+   * Create a complex DataType. Arrays, Maps and Structures are supported.
+   */
+  override def visitComplexDataType(ctx: ComplexDataTypeContext): DataType = withOrigin(ctx) {
+    ctx.complex.getType match {
+      case SqlBaseParser.ARRAY =>
+        ArrayType(typedVisit(ctx.dataType(0)))
+      case SqlBaseParser.MAP =>
+        MapType(typedVisit(ctx.dataType(0)), typedVisit(ctx.dataType(1)))
+      case SqlBaseParser.STRUCT =>
+        createStructType(ctx.colTypeList())
+    }
+  }
+
+  /**
+    * Create a [[StructType]] from a sequence of [[StructField]]s.
+    */
+  protected def createStructType(ctx: ColTypeListContext): StructType = {
+    StructType(Option(ctx).toSeq.flatMap(visitColTypeList))
+  }
+
+  /**
+   * Create a [[StructType]] from a number of column definitions.
+   */
+  override def visitColTypeList(ctx: ColTypeListContext): Seq[StructField] = withOrigin(ctx) {
+    ctx.colType().asScala.map(visitColType)
+  }
+
+  /**
+   * Create a [[StructField]] from a column definition.
+   */
+  override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) {
+    import ctx._
+
+    // Add the comment to the metadata.
+    val builder = new MetadataBuilder
+    if (STRING != null) {
+      builder.putString("comment", string(STRING))
+    }
+
+    StructField(identifier.getText, typedVisit(dataType), nullable = true, builder.build())
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/ParseDriver.scala
new file mode 100644
index 0000000000000..c9a286374c865
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/ParseDriver.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import org.antlr.v4.runtime._
+import org.antlr.v4.runtime.atn.PredictionMode
+import org.antlr.v4.runtime.misc.ParseCancellationException
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.parser.ng.SqlBaseParser._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.types.DataType
+
+/**
+ * Base SQL parsing infrastructure.
+ */
+abstract class AbstractSqlParser extends ParserInterface with Logging {
+
+  /** Creates/Resolves DataType for a given SQL string. */
+  def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
+    // TODO add this to the parser interface.
+    astBuilder.visitSingleDataType(parser.singleDataType())
+  }
+
+  /** Creates Expression for a given SQL string. */
+  override def parseExpression(sqlText: String): Expression = parse(sqlText) { parser =>
+    astBuilder.visitSingleExpression(parser.singleExpression())
+  }
+
+  /** Creates TableIdentifier for a given SQL string. */
+  override def parseTableIdentifier(sqlText: String): TableIdentifier = parse(sqlText) { parser =>
+    astBuilder.visitSingleTableIdentifier(parser.singleTableIdentifier())
+  }
+
+  /** Creates LogicalPlan for a given SQL string. */
+  override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
+    astBuilder.visitSingleStatement(parser.singleStatement()) match {
+      case plan: LogicalPlan => plan
+      case _ => nativeCommand(sqlText)
+    }
+  }
+
+  /** Get the builder (visitor) which converts a ParseTree into a AST. */
+  protected def astBuilder: AstBuilder
+
+  /** Create a native command, or fail when this is not supported. */
+  protected def nativeCommand(sqlText: String): LogicalPlan = {
+    val position = Origin(None, None)
+    throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
+  }
+
+  protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
+    logInfo(s"Parsing command: $command")
+
+    val lexer = new SqlBaseLexer(new ANTLRNoCaseStringStream(command))
+    lexer.removeErrorListeners()
+    lexer.addErrorListener(ParseErrorListener)
+
+    val tokenStream = new CommonTokenStream(lexer)
+    val parser = new SqlBaseParser(tokenStream)
+    parser.addParseListener(PostProcessor)
+    parser.removeErrorListeners()
+    parser.addErrorListener(ParseErrorListener)
+
+    try {
+      try {
+        // first, try parsing with potentially faster SLL mode
+        parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
+        toResult(parser)
+      }
+      catch {
+        case e: ParseCancellationException =>
+          // if we fail, parse with LL mode
+          tokenStream.reset() // rewind input stream
+          parser.reset()
+
+          // Try Again.
+          parser.getInterpreter.setPredictionMode(PredictionMode.LL)
+          toResult(parser)
+      }
+    }
+    catch {
+      case e: ParseException if e.command.isDefined =>
+        throw e
+      case e: ParseException =>
+        throw e.withCommand(command)
+      case e: AnalysisException =>
+        val position = Origin(e.line, e.startPosition)
+        throw new ParseException(Option(command), e.message, position, position)
+    }
+  }
+}
+
+/**
+ * Concrete SQL parser for Catalyst-only SQL statements.
+ */
+object CatalystSqlParser extends AbstractSqlParser {
+  val astBuilder = new AstBuilder
+}
+
+/**
+ * This string stream provides the lexer with upper case characters only. This greatly simplifies
+ * lexing the stream, while we can maintain the original command.
+ *
+ * This is based on Hive's org.apache.hadoop.hive.ql.parse.ParseDriver.ANTLRNoCaseStringStream
+ *
+ * The comment below (taken from the original class) describes the rationale for doing this:
+ *
+ * This class provides and implementation for a case insensitive token checker for the lexical
+ * analysis part of antlr. By converting the token stream into upper case at the time when lexical
+ * rules are checked, this class ensures that the lexical rules need to just match the token with
+ * upper case letters as opposed to combination of upper case and lower case characters. This is
+ * purely used for matching lexical rules. The actual token text is stored in the same way as the
+ * user input without actually converting it into an upper case. The token values are generated by
+ * the consume() function of the super class ANTLRStringStream. The LA() function is the lookahead
+ * function and is purely used for matching lexical rules. This also means that the grammar will
+ * only accept capitalized tokens in case it is run from other tools like antlrworks which do not
+ * have the ANTLRNoCaseStringStream implementation.
+ */
+
+private[parser] class ANTLRNoCaseStringStream(input: String) extends ANTLRInputStream(input) {
+  override def LA(i: Int): Int = {
+    val la = super.LA(i)
+    if (la == 0 || la == IntStream.EOF) la
+    else Character.toUpperCase(la)
+  }
+}
+
+/**
+ * The ParseErrorListener converts parse errors into AnalysisExceptions.
+ */
+case object ParseErrorListener extends BaseErrorListener {
+  override def syntaxError(
+      recognizer: Recognizer[_, _],
+      offendingSymbol: scala.Any,
+      line: Int,
+      charPositionInLine: Int,
+      msg: String,
+      e: RecognitionException): Unit = {
+    val position = Origin(Some(line), Some(charPositionInLine))
+    throw new ParseException(None, msg, position, position)
+  }
+}
+
+/**
+ * A [[ParseException]] is an [[AnalysisException]] that is thrown during the parse process. It
+ * contains fields and an extended error message that make reporting and diagnosing errors easier.
+ */
+class ParseException(
+    val command: Option[String],
+    message: String,
+    val start: Origin,
+    val stop: Origin) extends AnalysisException(message, start.line, start.startPosition) {
+
+  def this(message: String, ctx: ParserRuleContext) = {
+    this(Option(ParserUtils.command(ctx)),
+      message,
+      ParserUtils.position(ctx.getStart),
+      ParserUtils.position(ctx.getStop))
+  }
+
+  override def getMessage: String = {
+    val builder = new StringBuilder
+    builder ++= "\n" ++= message
+    start match {
+      case Origin(Some(l), Some(p)) =>
+        builder ++= s"(line $l, pos $p)\n"
+        command.foreach { cmd =>
+          val (above, below) = cmd.split("\n").splitAt(l)
+          builder ++= "\n== SQL ==\n"
+          above.foreach(builder ++= _ += '\n')
+          builder ++= (0 until p).map(_ => "-").mkString("") ++= "^^^\n"
+          below.foreach(builder ++= _ += '\n')
+        }
+      case _ =>
+        command.foreach { cmd =>
+          builder ++= "\n== SQL ==\n" ++= cmd
+        }
+    }
+    builder.toString
+  }
+
+  def withCommand(cmd: String): ParseException = {
+    new ParseException(Option(cmd), message, start, stop)
+  }
+}
+
+/**
+ * The post-processor validates & cleans-up the parse tree during the parse process.
+ */
+case object PostProcessor extends SqlBaseBaseListener {
+
+  /** Remove the back ticks from an Identifier. */
+  override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
+    replaceTokenByIdentifier(ctx, 1) { token =>
+      // Remove the double back ticks in the string.
+      token.setText(token.getText.replace("``", "`"))
+      token
+    }
+  }
+
+  /** Treat non-reserved keywords as Identifiers. */
+  override def exitNonReserved(ctx: NonReservedContext): Unit = {
+    replaceTokenByIdentifier(ctx, 0)(identity)
+  }
+
+  private def replaceTokenByIdentifier(
+      ctx: ParserRuleContext,
+      stripMargins: Int)(
+      f: CommonToken => CommonToken = identity): Unit = {
+    val parent = ctx.getParent
+    parent.removeLastChild()
+    val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
+    parent.addChild(f(new CommonToken(
+      new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
+      SqlBaseParser.IDENTIFIER,
+      token.getChannel,
+      token.getStartIndex + stripMargins,
+      token.getStopIndex - stripMargins)))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/ParserUtils.scala
new file mode 100644
index 0000000000000..1fbfa763b4820
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ng/ParserUtils.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import org.antlr.v4.runtime.{CharStream, ParserRuleContext, Token}
+import org.antlr.v4.runtime.misc.Interval
+import org.antlr.v4.runtime.tree.TerminalNode
+
+import org.apache.spark.sql.catalyst.parser.ParseUtils.unescapeSQLString
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+
+/**
+ * A collection of utility methods for use during the parsing process.
+ */
+object ParserUtils {
+  /** Get the command which created the token. */
+  def command(ctx: ParserRuleContext): String = {
+    command(ctx.getStart.getInputStream)
+  }
+
+  /** Get the command which created the token. */
+  def command(stream: CharStream): String = {
+    stream.getText(Interval.of(0, stream.size()))
+  }
+
+  /** Get the code that creates the given node. */
+  def source(ctx: ParserRuleContext): String = {
+    val stream = ctx.getStart.getInputStream
+    stream.getText(Interval.of(ctx.getStart.getStartIndex, ctx.getStop.getStopIndex))
+  }
+
+  /** Get all the text which comes after the given rule. */
+  def remainder(ctx: ParserRuleContext): String = remainder(ctx.getStop)
+
+  /** Get all the text which comes after the given token. */
+  def remainder(token: Token): String = {
+    val stream = token.getInputStream
+    val interval = Interval.of(token.getStopIndex + 1, stream.size())
+    stream.getText(interval)
+  }
+
+  /** Convert a string token into a string. */
+  def string(token: Token): String = unescapeSQLString(token.getText)
+
+  /** Convert a string node into a string. */
+  def string(node: TerminalNode): String = unescapeSQLString(node.getText)
+
+  /** Get the origin (line and position) of the token. */
+  def position(token: Token): Origin = {
+    Origin(Option(token.getLine), Option(token.getCharPositionInLine))
+  }
+
+  /** Assert if a condition holds. If it doesn't throw a parse exception. */
+  def assert(f: => Boolean, message: String, ctx: ParserRuleContext): Unit = {
+    if (!f) {
+      throw new ParseException(message, ctx)
+    }
+  }
+
+  /**
+   * Register the origin of the context. Any TreeNode created in the closure will be assigned the
+   * registered origin. This method restores the previously set origin after completion of the
+   * closure.
+   */
+  def withOrigin[T](ctx: ParserRuleContext)(f: => T): T = {
+    val current = CurrentOrigin.get
+    CurrentOrigin.set(position(ctx.getStart))
+    try {
+      f
+    } finally {
+      CurrentOrigin.set(current)
+    }
+  }
+
+  /** Some syntactic sugar which makes it easier to work with optional clauses for LogicalPlans. */
+  implicit class EnhancedLogicalPlan(val plan: LogicalPlan) extends AnyVal {
+    /**
+     * Create a plan using the block of code when the given context exists. Otherwise return the
+     * original plan.
+     */
+    def optional(ctx: AnyRef)(f: => LogicalPlan): LogicalPlan = {
+      if (ctx != null) {
+        f
+      } else {
+        plan
+      }
+    }
+
+    /**
+     * Map a [[LogicalPlan]] to another [[LogicalPlan]] if the passed context exists using the
+     * passed function. The original plan is returned when the context does not exist.
+     */
+    def optionalMap[C <: ParserRuleContext](
+        ctx: C)(
+        f: (C, LogicalPlan) => LogicalPlan): LogicalPlan = {
+      if (ctx != null) {
+        f(ctx, plan)
+      } else {
+        plan
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
index dabf9a2fc063a..fb7251d71b9b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
@@ -23,7 +23,6 @@ import org.json4s.JsonDSL._
 import org.apache.spark.annotation.DeveloperApi
 
 /**
- * ::DeveloperApi::
  * The data type for User Defined Types (UDTs).
  *
  * This interface allows a user to make their own classes more interoperable with SparkSQL;
@@ -35,8 +34,11 @@ import org.apache.spark.annotation.DeveloperApi
  *
  * The conversion via `serialize` occurs when instantiating a `DataFrame` from another RDD.
  * The conversion via `deserialize` occurs when reading from a `DataFrame`.
+ *
+ * Note: This was previously a developer API in Spark 1.x. We are making this private in Spark 2.0
+ * because we will very likely create a new version of this that works better with Datasets.
  */
-@DeveloperApi
+private[spark]
 abstract class UserDefinedType[UserType >: Null] extends DataType with Serializable {
 
   /** Underlying storage type for this UDT */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 6fa4beed99267..3ec95ef5b5c6c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -30,8 +30,8 @@ trait AnalysisTest extends PlanTest {
 
   private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
     val conf = new SimpleCatalystConf(caseSensitive)
-    val catalog = new SessionCatalog(new InMemoryCatalog, conf)
-    catalog.createTempTable("TaBlE", TestRelations.testRelation, ignoreIfExists = true)
+    val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
+    catalog.createTempTable("TaBlE", TestRelations.testRelation, overrideIfExists = true)
     new Analyzer(catalog, EmptyFunctionRegistry, conf) {
       override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index 31501864a8e13..1a350bf847994 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types._
 
 class DecimalPrecisionSuite extends PlanTest with BeforeAndAfter {
   private val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true)
-  private val catalog = new SessionCatalog(new InMemoryCatalog, conf)
+  private val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
   private val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf)
 
   private val relation = LocalRelation(
@@ -52,7 +52,7 @@ class DecimalPrecisionSuite extends PlanTest with BeforeAndAfter {
   private val b: Expression = UnresolvedAttribute("b")
 
   before {
-    catalog.createTempTable("table", relation, ignoreIfExists = true)
+    catalog.createTempTable("table", relation, overrideIfExists = true)
   }
 
   private def checkType(expression: Expression, expectedType: DataType): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
index 277c2d717e3dd..959bd564d9ff6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
@@ -210,7 +210,7 @@ abstract class CatalogTestCases extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("get table") {
-    assert(newBasicCatalog().getTable("db2", "tbl1").name.table == "tbl1")
+    assert(newBasicCatalog().getTable("db2", "tbl1").identifier.table == "tbl1")
   }
 
   test("get table when database/table does not exist") {
@@ -452,7 +452,7 @@ abstract class CatalogTestCases extends SparkFunSuite with BeforeAndAfterEach {
     assert(catalog.getFunction("db2", "func1").className == funcClass)
     catalog.renameFunction("db2", "func1", newName)
     intercept[AnalysisException] { catalog.getFunction("db2", "func1") }
-    assert(catalog.getFunction("db2", newName).name.funcName == newName)
+    assert(catalog.getFunction("db2", newName).identifier.funcName == newName)
     assert(catalog.getFunction("db2", newName).className == funcClass)
     intercept[AnalysisException] { catalog.renameFunction("db2", "does_not_exist", "me") }
   }
@@ -549,7 +549,7 @@ abstract class CatalogTestUtils {
 
   def newTable(name: String, database: Option[String] = None): CatalogTable = {
     CatalogTable(
-      name = TableIdentifier(name, database),
+      identifier = TableIdentifier(name, database),
       tableType = CatalogTableType.EXTERNAL_TABLE,
       storage = storageFormat,
       schema = Seq(CatalogColumn("col1", "int"), CatalogColumn("col2", "string")),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 74e995cc5b4b9..acd97592b6a6b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.catalog
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{Range, SubqueryAlias}
 
 
@@ -197,17 +198,17 @@ class SessionCatalogSuite extends SparkFunSuite {
     val catalog = new SessionCatalog(newBasicCatalog())
     val tempTable1 = Range(1, 10, 1, 10, Seq())
     val tempTable2 = Range(1, 20, 2, 10, Seq())
-    catalog.createTempTable("tbl1", tempTable1, ignoreIfExists = false)
-    catalog.createTempTable("tbl2", tempTable2, ignoreIfExists = false)
+    catalog.createTempTable("tbl1", tempTable1, overrideIfExists = false)
+    catalog.createTempTable("tbl2", tempTable2, overrideIfExists = false)
     assert(catalog.getTempTable("tbl1") == Some(tempTable1))
     assert(catalog.getTempTable("tbl2") == Some(tempTable2))
     assert(catalog.getTempTable("tbl3") == None)
     // Temporary table already exists
     intercept[AnalysisException] {
-      catalog.createTempTable("tbl1", tempTable1, ignoreIfExists = false)
+      catalog.createTempTable("tbl1", tempTable1, overrideIfExists = false)
     }
     // Temporary table already exists but we override it
-    catalog.createTempTable("tbl1", tempTable2, ignoreIfExists = true)
+    catalog.createTempTable("tbl1", tempTable2, overrideIfExists = true)
     assert(catalog.getTempTable("tbl1") == Some(tempTable2))
   }
 
@@ -243,7 +244,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     val externalCatalog = newBasicCatalog()
     val sessionCatalog = new SessionCatalog(externalCatalog)
     val tempTable = Range(1, 10, 2, 10, Seq())
-    sessionCatalog.createTempTable("tbl1", tempTable, ignoreIfExists = false)
+    sessionCatalog.createTempTable("tbl1", tempTable, overrideIfExists = false)
     sessionCatalog.setCurrentDatabase("db2")
     assert(sessionCatalog.getTempTable("tbl1") == Some(tempTable))
     assert(externalCatalog.listTables("db2").toSet == Set("tbl1", "tbl2"))
@@ -255,7 +256,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     sessionCatalog.dropTable(TableIdentifier("tbl1"), ignoreIfNotExists = false)
     assert(externalCatalog.listTables("db2").toSet == Set("tbl2"))
     // If database is specified, temp tables are never dropped
-    sessionCatalog.createTempTable("tbl1", tempTable, ignoreIfExists = false)
+    sessionCatalog.createTempTable("tbl1", tempTable, overrideIfExists = false)
     sessionCatalog.createTable(newTable("tbl1", "db2"), ignoreIfExists = false)
     sessionCatalog.dropTable(TableIdentifier("tbl1", Some("db2")), ignoreIfNotExists = false)
     assert(sessionCatalog.getTempTable("tbl1") == Some(tempTable))
@@ -299,7 +300,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     val externalCatalog = newBasicCatalog()
     val sessionCatalog = new SessionCatalog(externalCatalog)
     val tempTable = Range(1, 10, 2, 10, Seq())
-    sessionCatalog.createTempTable("tbl1", tempTable, ignoreIfExists = false)
+    sessionCatalog.createTempTable("tbl1", tempTable, overrideIfExists = false)
     sessionCatalog.setCurrentDatabase("db2")
     assert(sessionCatalog.getTempTable("tbl1") == Some(tempTable))
     assert(externalCatalog.listTables("db2").toSet == Set("tbl1", "tbl2"))
@@ -327,7 +328,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(newTbl1.properties.get("toh") == Some("frem"))
     // Alter table without explicitly specifying database
     sessionCatalog.setCurrentDatabase("db2")
-    sessionCatalog.alterTable(tbl1.copy(name = TableIdentifier("tbl1")))
+    sessionCatalog.alterTable(tbl1.copy(identifier = TableIdentifier("tbl1")))
     val newestTbl1 = externalCatalog.getTable("db2", "tbl1")
     assert(newestTbl1 == tbl1)
   }
@@ -368,7 +369,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     val sessionCatalog = new SessionCatalog(externalCatalog)
     val tempTable1 = Range(1, 10, 1, 10, Seq())
     val metastoreTable1 = externalCatalog.getTable("db2", "tbl1")
-    sessionCatalog.createTempTable("tbl1", tempTable1, ignoreIfExists = false)
+    sessionCatalog.createTempTable("tbl1", tempTable1, overrideIfExists = false)
     sessionCatalog.setCurrentDatabase("db2")
     // If we explicitly specify the database, we'll look up the relation in that database
     assert(sessionCatalog.lookupRelation(TableIdentifier("tbl1", Some("db2")))
@@ -406,7 +407,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(!catalog.tableExists(TableIdentifier("tbl2", Some("db1"))))
     // If database is explicitly specified, do not check temporary tables
     val tempTable = Range(1, 10, 1, 10, Seq())
-    catalog.createTempTable("tbl3", tempTable, ignoreIfExists = false)
+    catalog.createTempTable("tbl3", tempTable, overrideIfExists = false)
     assert(!catalog.tableExists(TableIdentifier("tbl3", Some("db2"))))
     // If database is not explicitly specified, check the current database
     catalog.setCurrentDatabase("db2")
@@ -418,8 +419,8 @@ class SessionCatalogSuite extends SparkFunSuite {
   test("list tables without pattern") {
     val catalog = new SessionCatalog(newBasicCatalog())
     val tempTable = Range(1, 10, 2, 10, Seq())
-    catalog.createTempTable("tbl1", tempTable, ignoreIfExists = false)
-    catalog.createTempTable("tbl4", tempTable, ignoreIfExists = false)
+    catalog.createTempTable("tbl1", tempTable, overrideIfExists = false)
+    catalog.createTempTable("tbl4", tempTable, overrideIfExists = false)
     assert(catalog.listTables("db1").toSet ==
       Set(TableIdentifier("tbl1"), TableIdentifier("tbl4")))
     assert(catalog.listTables("db2").toSet ==
@@ -435,8 +436,8 @@ class SessionCatalogSuite extends SparkFunSuite {
   test("list tables with pattern") {
     val catalog = new SessionCatalog(newBasicCatalog())
     val tempTable = Range(1, 10, 2, 10, Seq())
-    catalog.createTempTable("tbl1", tempTable, ignoreIfExists = false)
-    catalog.createTempTable("tbl4", tempTable, ignoreIfExists = false)
+    catalog.createTempTable("tbl1", tempTable, overrideIfExists = false)
+    catalog.createTempTable("tbl4", tempTable, overrideIfExists = false)
     assert(catalog.listTables("db1", "*").toSet == catalog.listTables("db1").toSet)
     assert(catalog.listTables("db2", "*").toSet == catalog.listTables("db2").toSet)
     assert(catalog.listTables("db2", "tbl*").toSet ==
@@ -682,20 +683,20 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("create temp function") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    val tempFunc1 = newFunc("temp1")
-    val tempFunc2 = newFunc("temp2")
-    catalog.createTempFunction(tempFunc1, ignoreIfExists = false)
-    catalog.createTempFunction(tempFunc2, ignoreIfExists = false)
+    val tempFunc1 = (e: Seq[Expression]) => e.head
+    val tempFunc2 = (e: Seq[Expression]) => e.last
+    catalog.createTempFunction("temp1", tempFunc1, ignoreIfExists = false)
+    catalog.createTempFunction("temp2", tempFunc2, ignoreIfExists = false)
     assert(catalog.getTempFunction("temp1") == Some(tempFunc1))
     assert(catalog.getTempFunction("temp2") == Some(tempFunc2))
     assert(catalog.getTempFunction("temp3") == None)
+    val tempFunc3 = (e: Seq[Expression]) => Literal(e.size)
     // Temporary function already exists
     intercept[AnalysisException] {
-      catalog.createTempFunction(tempFunc1, ignoreIfExists = false)
+      catalog.createTempFunction("temp1", tempFunc3, ignoreIfExists = false)
     }
     // Temporary function is overridden
-    val tempFunc3 = tempFunc1.copy(className = "something else")
-    catalog.createTempFunction(tempFunc3, ignoreIfExists = true)
+    catalog.createTempFunction("temp1", tempFunc3, ignoreIfExists = true)
     assert(catalog.getTempFunction("temp1") == Some(tempFunc3))
   }
 
@@ -725,8 +726,8 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("drop temp function") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    val tempFunc = newFunc("func1")
-    catalog.createTempFunction(tempFunc, ignoreIfExists = false)
+    val tempFunc = (e: Seq[Expression]) => e.head
+    catalog.createTempFunction("func1", tempFunc, ignoreIfExists = false)
     assert(catalog.getTempFunction("func1") == Some(tempFunc))
     catalog.dropTempFunction("func1", ignoreIfNotExists = false)
     assert(catalog.getTempFunction("func1") == None)
@@ -755,20 +756,15 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
   }
 
-  test("get temp function") {
-    val externalCatalog = newBasicCatalog()
-    val sessionCatalog = new SessionCatalog(externalCatalog)
-    val metastoreFunc = externalCatalog.getFunction("db2", "func1")
-    val tempFunc = newFunc("func1").copy(className = "something weird")
-    sessionCatalog.createTempFunction(tempFunc, ignoreIfExists = false)
-    sessionCatalog.setCurrentDatabase("db2")
-    // If a database is specified, we'll always return the function in that database
-    assert(sessionCatalog.getFunction(FunctionIdentifier("func1", Some("db2"))) == metastoreFunc)
-    // If no database is specified, we'll first return temporary functions
-    assert(sessionCatalog.getFunction(FunctionIdentifier("func1")) == tempFunc)
-    // Then, if no such temporary function exist, check the current database
-    sessionCatalog.dropTempFunction("func1", ignoreIfNotExists = false)
-    assert(sessionCatalog.getFunction(FunctionIdentifier("func1")) == metastoreFunc)
+  test("lookup temp function") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    val tempFunc1 = (e: Seq[Expression]) => e.head
+    catalog.createTempFunction("func1", tempFunc1, ignoreIfExists = false)
+    assert(catalog.lookupFunction("func1", Seq(Literal(1), Literal(2), Literal(3))) == Literal(1))
+    catalog.dropTempFunction("func1", ignoreIfNotExists = false)
+    intercept[AnalysisException] {
+      catalog.lookupFunction("func1", Seq(Literal(1), Literal(2), Literal(3)))
+    }
   }
 
   test("rename function") {
@@ -813,8 +809,8 @@ class SessionCatalogSuite extends SparkFunSuite {
   test("rename temp function") {
     val externalCatalog = newBasicCatalog()
     val sessionCatalog = new SessionCatalog(externalCatalog)
-    val tempFunc = newFunc("func1").copy(className = "something weird")
-    sessionCatalog.createTempFunction(tempFunc, ignoreIfExists = false)
+    val tempFunc = (e: Seq[Expression]) => e.head
+    sessionCatalog.createTempFunction("func1", tempFunc, ignoreIfExists = false)
     sessionCatalog.setCurrentDatabase("db2")
     // If a database is specified, we'll always rename the function in that database
     sessionCatalog.renameFunction(
@@ -825,8 +821,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     // If no database is specified, we'll first rename temporary functions
     sessionCatalog.createFunction(newFunc("func1", Some("db2")))
     sessionCatalog.renameFunction(FunctionIdentifier("func1"), FunctionIdentifier("func4"))
-    assert(sessionCatalog.getTempFunction("func4") ==
-      Some(tempFunc.copy(name = FunctionIdentifier("func4"))))
+    assert(sessionCatalog.getTempFunction("func4") == Some(tempFunc))
     assert(sessionCatalog.getTempFunction("func1") == None)
     assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func1", "func3"))
     // Then, if no such temporary function exist, rename the function in the current database
@@ -858,12 +853,12 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("list functions") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    val tempFunc1 = newFunc("func1").copy(className = "march")
-    val tempFunc2 = newFunc("yes_me").copy(className = "april")
+    val tempFunc1 = (e: Seq[Expression]) => e.head
+    val tempFunc2 = (e: Seq[Expression]) => e.last
     catalog.createFunction(newFunc("func2", Some("db2")))
     catalog.createFunction(newFunc("not_me", Some("db2")))
-    catalog.createTempFunction(tempFunc1, ignoreIfExists = false)
-    catalog.createTempFunction(tempFunc2, ignoreIfExists = false)
+    catalog.createTempFunction("func1", tempFunc1, ignoreIfExists = false)
+    catalog.createTempFunction("yes_me", tempFunc2, ignoreIfExists = false)
     assert(catalog.listFunctions("db1", "*").toSet ==
       Set(FunctionIdentifier("func1"),
         FunctionIdentifier("yes_me")))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index e2c76b700f51c..dd6b5cac28fa2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -140,7 +140,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
 
   private val caseInsensitiveConf = new SimpleCatalystConf(false)
   private val caseInsensitiveAnalyzer = new Analyzer(
-    new SessionCatalog(new InMemoryCatalog, caseInsensitiveConf),
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, caseInsensitiveConf),
     EmptyFunctionRegistry,
     caseInsensitiveConf)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index 3824c675630c4..009889d5a1f9d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.rules._
 
 class EliminateSortsSuite extends PlanTest {
   val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false)
-  val catalog = new SessionCatalog(new InMemoryCatalog, conf)
+  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
   val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf)
 
   object Optimize extends RuleExecutor[LogicalPlan] {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
index c068e895b6643..223485e29234d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/CatalystQlSuite.scala
@@ -21,15 +21,20 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.ng.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class CatalystQlSuite extends PlanTest {
   val parser = new CatalystQl()
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+  import org.apache.spark.sql.catalyst.dsl.plans._
+
+  val star = UnresolvedAlias(UnresolvedStar(None))
 
   test("test case insensitive") {
-    val result = Project(UnresolvedAlias(Literal(1)):: Nil, OneRowRelation)
+    val result = OneRowRelation.select(1)
     assert(result === parser.parsePlan("seLect 1"))
     assert(result === parser.parsePlan("select 1"))
     assert(result === parser.parsePlan("SELECT 1"))
@@ -37,52 +42,31 @@ class CatalystQlSuite extends PlanTest {
 
   test("test NOT operator with comparison operations") {
     val parsed = parser.parsePlan("SELECT NOT TRUE > TRUE")
-    val expected = Project(
-      UnresolvedAlias(
-        Not(
-          GreaterThan(Literal(true), Literal(true)))
-      ) :: Nil,
-      OneRowRelation)
+    val expected = OneRowRelation.select(Not(GreaterThan(true, true)))
     comparePlans(parsed, expected)
   }
 
   test("test Union Distinct operator") {
-    val parsed1 = parser.parsePlan("SELECT * FROM t0 UNION SELECT * FROM t1")
-    val parsed2 = parser.parsePlan("SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
-    val expected =
-      Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-        SubqueryAlias("u_1",
-          Distinct(
-            Union(
-              Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-                UnresolvedRelation(TableIdentifier("t0"), None)),
-              Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-                UnresolvedRelation(TableIdentifier("t1"), None))))))
+    val parsed1 = parser.parsePlan(
+      "SELECT * FROM t0 UNION SELECT * FROM t1")
+    val parsed2 = parser.parsePlan(
+      "SELECT * FROM t0 UNION DISTINCT SELECT * FROM t1")
+    val expected = Distinct(Union(table("t0").select(star), table("t1").select(star)))
+      .as("u_1").select(star)
     comparePlans(parsed1, expected)
     comparePlans(parsed2, expected)
   }
 
   test("test Union All operator") {
     val parsed = parser.parsePlan("SELECT * FROM t0 UNION ALL SELECT * FROM t1")
-    val expected =
-      Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-        SubqueryAlias("u_1",
-          Union(
-            Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-              UnresolvedRelation(TableIdentifier("t0"), None)),
-            Project(UnresolvedAlias(UnresolvedStar(None)) :: Nil,
-              UnresolvedRelation(TableIdentifier("t1"), None)))))
+    val expected = Union(table("t0").select(star), table("t1").select(star)).as("u_1").select(star)
     comparePlans(parsed, expected)
   }
 
   test("support hive interval literal") {
     def checkInterval(sql: String, result: CalendarInterval): Unit = {
       val parsed = parser.parsePlan(sql)
-      val expected = Project(
-        UnresolvedAlias(
-          Literal(result)
-        ) :: Nil,
-        OneRowRelation)
+      val expected = OneRowRelation.select(Literal(result))
       comparePlans(parsed, expected)
     }
 
@@ -129,11 +113,7 @@ class CatalystQlSuite extends PlanTest {
   test("support scientific notation") {
     def assertRight(input: String, output: Double): Unit = {
       val parsed = parser.parsePlan("SELECT " + input)
-      val expected = Project(
-        UnresolvedAlias(
-          Literal(output)
-        ) :: Nil,
-        OneRowRelation)
+      val expected = OneRowRelation.select(Literal(output))
       comparePlans(parsed, expected)
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 7d3608033ba59..d9bd33c50ab1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -18,19 +18,24 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.parser.ng.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.types._
 
-class DataTypeParserSuite extends SparkFunSuite {
+abstract class AbstractDataTypeParserSuite extends SparkFunSuite {
+
+  def parse(sql: String): DataType
 
   def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = {
     test(s"parse ${dataTypeString.replace("\n", "")}") {
-      assert(DataTypeParser.parse(dataTypeString) === expectedDataType)
+      assert(parse(dataTypeString) === expectedDataType)
     }
   }
 
+  def intercept(sql: String)
+
   def unsupported(dataTypeString: String): Unit = {
     test(s"$dataTypeString is not supported") {
-      intercept[DataTypeException](DataTypeParser.parse(dataTypeString))
+      intercept(dataTypeString)
     }
   }
 
@@ -97,13 +102,6 @@ class DataTypeParserSuite extends SparkFunSuite {
       StructField("arrAy", ArrayType(DoubleType, true), true) ::
       StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
   )
-  // A column name can be a reserved word in our DDL parser and SqlParser.
-  checkDataType(
-    "Struct<TABLE: string, CASE:boolean>",
-    StructType(
-      StructField("TABLE", StringType, true) ::
-      StructField("CASE", BooleanType, true) :: Nil)
-  )
   // Use backticks to quote column names having special characters.
   checkDataType(
     "struct<`x+y`:int, `!@#$%^&*()`:string, `1_2.345<>:\"`:varchar(20)>",
@@ -118,6 +116,43 @@ class DataTypeParserSuite extends SparkFunSuite {
   unsupported("it is not a data type")
   unsupported("struct<x+y: int, 1.1:timestamp>")
   unsupported("struct<x: int")
+}
+
+class DataTypeParserSuite extends AbstractDataTypeParserSuite {
+  override def intercept(sql: String): Unit =
+    intercept[DataTypeException](DataTypeParser.parse(sql))
+
+  override def parse(sql: String): DataType =
+    DataTypeParser.parse(sql)
+
+  // A column name can be a reserved word in our DDL parser and SqlParser.
+  checkDataType(
+    "Struct<TABLE: string, CASE:boolean>",
+    StructType(
+      StructField("TABLE", StringType, true) ::
+        StructField("CASE", BooleanType, true) :: Nil)
+  )
+
   unsupported("struct<x int, y string>")
+
   unsupported("struct<`x``y` int>")
 }
+
+class CatalystQlDataTypeParserSuite extends AbstractDataTypeParserSuite {
+  override def intercept(sql: String): Unit =
+    intercept[ParseException](CatalystSqlParser.parseDataType(sql))
+
+  override def parse(sql: String): DataType =
+    CatalystSqlParser.parseDataType(sql)
+
+  // A column name can be a reserved word in our DDL parser and SqlParser.
+  unsupported("Struct<TABLE: string, CASE:boolean>")
+
+  checkDataType(
+    "struct<x int, y string>",
+    (new StructType).add("x", IntegerType).add("y", StringType))
+
+  checkDataType(
+    "struct<`x``y` int>",
+    (new StructType).add("x`y", IntegerType))
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/ErrorParserSuite.scala
new file mode 100644
index 0000000000000..1963fc368fec6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/ErrorParserSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * Test various parser errors.
+ */
+class ErrorParserSuite extends SparkFunSuite {
+  def intercept(sql: String, line: Int, startPosition: Int, messages: String*): Unit = {
+    val e = intercept[ParseException](CatalystSqlParser.parsePlan(sql))
+
+    // Check position.
+    assert(e.line.isDefined)
+    assert(e.line.get === line)
+    assert(e.startPosition.isDefined)
+    assert(e.startPosition.get === startPosition)
+
+    // Check messages.
+    val error = e.getMessage
+    messages.foreach { message =>
+      assert(error.contains(message))
+    }
+  }
+
+  test("no viable input") {
+    intercept("select from tbl", 1, 7, "no viable alternative at input", "-------^^^")
+    intercept("select\nfrom tbl", 2, 0, "no viable alternative at input", "^^^")
+    intercept("select ((r + 1) ", 1, 16, "no viable alternative at input", "----------------^^^")
+  }
+
+  test("extraneous input") {
+    intercept("select 1 1", 1, 9, "extraneous input '1' expecting", "---------^^^")
+    intercept("select *\nfrom r as q t", 2, 12, "extraneous input", "------------^^^")
+  }
+
+  test("mismatched input") {
+    intercept("select * from r order by q from t", 1, 27,
+      "mismatched input",
+      "---------------------------^^^")
+    intercept("select *\nfrom r\norder by q\nfrom t", 4, 0, "mismatched input", "^^^")
+  }
+
+  test("semantic errors") {
+    intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0,
+      "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported",
+      "^^^")
+    intercept("select * from r where a in (select * from t)", 1, 24,
+      "IN with a Sub-query is currently not supported",
+      "------------------------^^^")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/ExpressionParserSuite.scala
new file mode 100644
index 0000000000000..32311a5a66167
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/ExpressionParserSuite.scala
@@ -0,0 +1,497 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import java.sql.{Date, Timestamp}
+
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+/**
+ * Test basic expression parsing. If a type of expression is supported it should be tested here.
+ *
+ * Please note that some of the expressions test don't have to be sound expressions, only their
+ * structure needs to be valid. Unsound expressions should be caught by the Analyzer or
+ * CheckAnalysis classes.
+ */
+class ExpressionParserSuite extends PlanTest {
+  import CatalystSqlParser._
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+  import org.apache.spark.sql.catalyst.dsl.plans._
+
+  def assertEqual(sqlCommand: String, e: Expression): Unit = {
+    compareExpressions(parseExpression(sqlCommand), e)
+  }
+
+  def intercept(sqlCommand: String, messages: String*): Unit = {
+    val e = intercept[ParseException](parseExpression(sqlCommand))
+    messages.foreach { message =>
+      assert(e.message.contains(message))
+    }
+  }
+
+  test("star expressions") {
+    // Global Star
+    assertEqual("*", UnresolvedStar(None))
+
+    // Targeted Star
+    assertEqual("a.b.*", UnresolvedStar(Option(Seq("a", "b"))))
+  }
+
+  // NamedExpression (Alias/Multialias)
+  test("named expressions") {
+    // No Alias
+    val r0 = 'a
+    assertEqual("a", r0)
+
+    // Single Alias.
+    val r1 = 'a as "b"
+    assertEqual("a as b", r1)
+    assertEqual("a b", r1)
+
+    // Multi-Alias
+    assertEqual("a as (b, c)", MultiAlias('a, Seq("b", "c")))
+    assertEqual("a() (b, c)", MultiAlias('a.function(), Seq("b", "c")))
+
+    // Numeric literals without a space between the literal qualifier and the alias, should not be
+    // interpreted as such. An unresolved reference should be returned instead.
+    // TODO add the JIRA-ticket number.
+    assertEqual("1SL", Symbol("1SL"))
+
+    // Aliased star is allowed.
+    assertEqual("a.* b", UnresolvedStar(Option(Seq("a"))) as 'b)
+  }
+
+  test("binary logical expressions") {
+    // And
+    assertEqual("a and b", 'a && 'b)
+
+    // Or
+    assertEqual("a or b", 'a || 'b)
+
+    // Combination And/Or check precedence
+    assertEqual("a and b or c and d", ('a && 'b) || ('c && 'd))
+    assertEqual("a or b or c and d", 'a || 'b || ('c && 'd))
+
+    // Multiple AND/OR get converted into a balanced tree
+    assertEqual("a or b or c or d or e or f", (('a || 'b) || 'c) || (('d || 'e) || 'f))
+    assertEqual("a and b and c and d and e and f", (('a && 'b) && 'c) && (('d && 'e) && 'f))
+  }
+
+  test("long binary logical expressions") {
+    def testVeryBinaryExpression(op: String, clazz: Class[_]): Unit = {
+      val sql = (1 to 1000).map(x => s"$x == $x").mkString(op)
+      val e = parseExpression(sql)
+      assert(e.collect { case _: EqualTo => true }.size === 1000)
+      assert(e.collect { case x if clazz.isInstance(x) => true }.size === 999)
+    }
+    testVeryBinaryExpression(" AND ", classOf[And])
+    testVeryBinaryExpression(" OR ", classOf[Or])
+  }
+
+  test("not expressions") {
+    assertEqual("not a", !'a)
+    assertEqual("!a", !'a)
+    assertEqual("not true > true", Not(GreaterThan(true, true)))
+  }
+
+  test("exists expression") {
+    intercept("exists (select 1 from b where b.x = a.x)", "EXISTS clauses are not supported")
+  }
+
+  test("comparison expressions") {
+    assertEqual("a = b", 'a === 'b)
+    assertEqual("a == b", 'a === 'b)
+    assertEqual("a <=> b", 'a <=> 'b)
+    assertEqual("a <> b", 'a =!= 'b)
+    assertEqual("a != b", 'a =!= 'b)
+    assertEqual("a < b", 'a < 'b)
+    assertEqual("a <= b", 'a <= 'b)
+    assertEqual("a > b", 'a > 'b)
+    assertEqual("a >= b", 'a >= 'b)
+  }
+
+  test("between expressions") {
+    assertEqual("a between b and c", 'a >= 'b && 'a <= 'c)
+    assertEqual("a not between b and c", !('a >= 'b && 'a <= 'c))
+  }
+
+  test("in expressions") {
+    assertEqual("a in (b, c, d)", 'a in ('b, 'c, 'd))
+    assertEqual("a not in (b, c, d)", !('a in ('b, 'c, 'd)))
+  }
+
+  test("in sub-query") {
+    intercept("a in (select b from c)", "IN with a Sub-query is currently not supported")
+  }
+
+  test("like expressions") {
+    assertEqual("a like 'pattern%'", 'a like "pattern%")
+    assertEqual("a not like 'pattern%'", !('a like "pattern%"))
+    assertEqual("a rlike 'pattern%'", 'a rlike "pattern%")
+    assertEqual("a not rlike 'pattern%'", !('a rlike "pattern%"))
+    assertEqual("a regexp 'pattern%'", 'a rlike "pattern%")
+    assertEqual("a not regexp 'pattern%'", !('a rlike "pattern%"))
+  }
+
+  test("is null expressions") {
+    assertEqual("a is null", 'a.isNull)
+    assertEqual("a is not null", 'a.isNotNull)
+    assertEqual("a = b is null", ('a === 'b).isNull)
+    assertEqual("a = b is not null", ('a === 'b).isNotNull)
+  }
+
+  test("binary arithmetic expressions") {
+    // Simple operations
+    assertEqual("a * b", 'a * 'b)
+    assertEqual("a / b", 'a / 'b)
+    assertEqual("a DIV b", ('a / 'b).cast(LongType))
+    assertEqual("a % b", 'a % 'b)
+    assertEqual("a + b", 'a + 'b)
+    assertEqual("a - b", 'a - 'b)
+    assertEqual("a & b", 'a & 'b)
+    assertEqual("a ^ b", 'a ^ 'b)
+    assertEqual("a | b", 'a | 'b)
+
+    // Check precedences
+    assertEqual(
+      "a * t | b ^ c & d - e + f % g DIV h / i * k",
+      'a * 't | ('b ^ ('c & ('d - 'e + (('f % 'g / 'h).cast(LongType) / 'i * 'k)))))
+  }
+
+  test("unary arithmetic expressions") {
+    assertEqual("+a", 'a)
+    assertEqual("-a", -'a)
+    assertEqual("~a", ~'a)
+    assertEqual("-+~~a", -(~(~'a)))
+  }
+
+  test("cast expressions") {
+    // Note that DataType parsing is tested elsewhere.
+    assertEqual("cast(a as int)", 'a.cast(IntegerType))
+    assertEqual("cast(a as timestamp)", 'a.cast(TimestampType))
+    assertEqual("cast(a as array<int>)", 'a.cast(ArrayType(IntegerType)))
+    assertEqual("cast(cast(a as int) as long)", 'a.cast(IntegerType).cast(LongType))
+  }
+
+  test("function expressions") {
+    assertEqual("foo()", 'foo.function())
+    assertEqual("foo.bar()", Symbol("foo.bar").function())
+    assertEqual("foo(*)", 'foo.function(star()))
+    assertEqual("count(*)", 'count.function(1))
+    assertEqual("foo(a, b)", 'foo.function('a, 'b))
+    assertEqual("foo(all a, b)", 'foo.function('a, 'b))
+    assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
+    assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
+    assertEqual("`select`(all a, b)", 'select.function('a, 'b))
+  }
+
+  test("window function expressions") {
+    val func = 'foo.function(star())
+    def windowed(
+        partitioning: Seq[Expression] = Seq.empty,
+        ordering: Seq[SortOrder] = Seq.empty,
+        frame: WindowFrame = UnspecifiedFrame): Expression = {
+      WindowExpression(func, WindowSpecDefinition(partitioning, ordering, frame))
+    }
+
+    // Basic window testing.
+    assertEqual("foo(*) over w1", UnresolvedWindowExpression(func, WindowSpecReference("w1")))
+    assertEqual("foo(*) over ()", windowed())
+    assertEqual("foo(*) over (partition by a, b)", windowed(Seq('a, 'b)))
+    assertEqual("foo(*) over (distribute by a, b)", windowed(Seq('a, 'b)))
+    assertEqual("foo(*) over (cluster by a, b)", windowed(Seq('a, 'b)))
+    assertEqual("foo(*) over (order by a desc, b asc)", windowed(Seq.empty, Seq('a.desc, 'b.asc )))
+    assertEqual("foo(*) over (sort by a desc, b asc)", windowed(Seq.empty, Seq('a.desc, 'b.asc )))
+    assertEqual("foo(*) over (partition by a, b order by c)", windowed(Seq('a, 'b), Seq('c.asc)))
+    assertEqual("foo(*) over (distribute by a, b sort by c)", windowed(Seq('a, 'b), Seq('c.asc)))
+
+    // Test use of expressions in window functions.
+    assertEqual(
+      "sum(product + 1) over (partition by ((product) + (1)) order by 2)",
+      WindowExpression('sum.function('product + 1),
+        WindowSpecDefinition(Seq('product + 1), Seq(Literal(2).asc), UnspecifiedFrame)))
+    assertEqual(
+      "sum(product + 1) over (partition by ((product / 2) + 1) order by 2)",
+      WindowExpression('sum.function('product + 1),
+        WindowSpecDefinition(Seq('product / 2 + 1), Seq(Literal(2).asc), UnspecifiedFrame)))
+
+    // Range/Row
+    val frameTypes = Seq(("rows", RowFrame), ("range", RangeFrame))
+    val boundaries = Seq(
+      ("10 preceding", ValuePreceding(10), CurrentRow),
+      ("3 + 1 following", ValueFollowing(4), CurrentRow), // Will fail during analysis
+      ("unbounded preceding", UnboundedPreceding, CurrentRow),
+      ("unbounded following", UnboundedFollowing, CurrentRow), // Will fail during analysis
+      ("between unbounded preceding and current row", UnboundedPreceding, CurrentRow),
+      ("between unbounded preceding and unbounded following",
+        UnboundedPreceding, UnboundedFollowing),
+      ("between 10 preceding and current row", ValuePreceding(10), CurrentRow),
+      ("between current row and 5 following", CurrentRow, ValueFollowing(5)),
+      ("between 10 preceding and 5 following", ValuePreceding(10), ValueFollowing(5))
+    )
+    frameTypes.foreach {
+      case (frameTypeSql, frameType) =>
+        boundaries.foreach {
+          case (boundarySql, begin, end) =>
+            val query = s"foo(*) over (partition by a order by b $frameTypeSql $boundarySql)"
+            val expr = windowed(Seq('a), Seq('b.asc), SpecifiedWindowFrame(frameType, begin, end))
+            assertEqual(query, expr)
+        }
+    }
+
+    // We cannot use non integer constants.
+    intercept("foo(*) over (partition by a order by b rows 10.0 preceding)",
+      "Frame bound value must be a constant integer.")
+
+    // We cannot use an arbitrary expression.
+    intercept("foo(*) over (partition by a order by b rows exp(b) preceding)",
+      "Frame bound value must be a constant integer.")
+  }
+
+  test("row constructor") {
+    // Note that '(a)' will be interpreted as a nested expression.
+    assertEqual("(a, b)", CreateStruct(Seq('a, 'b)))
+    assertEqual("(a, b, c)", CreateStruct(Seq('a, 'b, 'c)))
+  }
+
+  test("scalar sub-query") {
+    assertEqual(
+      "(select max(val) from tbl) > current",
+      ScalarSubquery(table("tbl").select('max.function('val))) > 'current)
+    assertEqual(
+      "a = (select b from s)",
+      'a === ScalarSubquery(table("s").select('b)))
+  }
+
+  test("case when") {
+    assertEqual("case a when 1 then b when 2 then c else d end",
+      CaseKeyWhen('a, Seq(1, 'b, 2, 'c, 'd)))
+    assertEqual("case when a = 1 then b when a = 2 then c else d end",
+      CaseWhen(Seq(('a === 1, 'b.expr), ('a === 2, 'c.expr)), 'd))
+  }
+
+  test("dereference") {
+    assertEqual("a.b", UnresolvedAttribute("a.b"))
+    assertEqual("`select`.b", UnresolvedAttribute("select.b"))
+    assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail analysis.
+    assertEqual("struct(a, b).b", 'struct.function('a, 'b).getField("b"))
+  }
+
+  test("reference") {
+    // Regular
+    assertEqual("a", 'a)
+
+    // Starting with a digit.
+    assertEqual("1a", Symbol("1a"))
+
+    // Quoted using a keyword.
+    assertEqual("`select`", 'select)
+
+    // Unquoted using an unreserved keyword.
+    assertEqual("columns", 'columns)
+  }
+
+  test("subscript") {
+    assertEqual("a[b]", 'a.getItem('b))
+    assertEqual("a[1 + 1]", 'a.getItem(Literal(1) + 1))
+    assertEqual("`c`.a[b]", UnresolvedAttribute("c.a").getItem('b))
+  }
+
+  test("parenthesis") {
+    assertEqual("(a)", 'a)
+    assertEqual("r * (a + b)", 'r * ('a + 'b))
+  }
+
+  test("type constructors") {
+    // Dates.
+    assertEqual("dAte '2016-03-11'", Literal(Date.valueOf("2016-03-11")))
+    intercept[IllegalArgumentException] {
+      parseExpression("DAtE 'mar 11 2016'")
+    }
+
+    // Timestamps.
+    assertEqual("tImEstAmp '2016-03-11 20:54:00.000'",
+      Literal(Timestamp.valueOf("2016-03-11 20:54:00.000")))
+    intercept[IllegalArgumentException] {
+      parseExpression("timestamP '2016-33-11 20:54:00.000'")
+    }
+
+    // Unsupported datatype.
+    intercept("GEO '(10,-6)'", "Literals of type 'GEO' are currently not supported.")
+  }
+
+  test("literals") {
+    // NULL
+    assertEqual("null", Literal(null))
+
+    // Boolean
+    assertEqual("trUe", Literal(true))
+    assertEqual("False", Literal(false))
+
+    // Integral should have the narrowest possible type
+    assertEqual("787324", Literal(787324))
+    assertEqual("7873247234798249234", Literal(7873247234798249234L))
+    assertEqual("78732472347982492793712334",
+      Literal(BigDecimal("78732472347982492793712334").underlying()))
+
+    // Decimal
+    assertEqual("7873247234798249279371.2334",
+      Literal(BigDecimal("7873247234798249279371.2334").underlying()))
+
+    // Scientific Decimal
+    assertEqual("9.0e1", 90d)
+    assertEqual(".9e+2", 90d)
+    assertEqual("0.9e+2", 90d)
+    assertEqual("900e-1", 90d)
+    assertEqual("900.0E-1", 90d)
+    assertEqual("9.e+1", 90d)
+    intercept(".e3")
+
+    // Tiny Int Literal
+    assertEqual("10Y", Literal(10.toByte))
+    intercept("-1000Y")
+
+    // Small Int Literal
+    assertEqual("10S", Literal(10.toShort))
+    intercept("40000S")
+
+    // Long Int Literal
+    assertEqual("10L", Literal(10L))
+    intercept("78732472347982492793712334L")
+
+    // Double Literal
+    assertEqual("10.0D", Literal(10.0D))
+    // TODO we need to figure out if we should throw an exception here!
+    assertEqual("1E309", Literal(Double.PositiveInfinity))
+  }
+
+  test("strings") {
+    // Single Strings.
+    assertEqual("\"hello\"", "hello")
+    assertEqual("'hello'", "hello")
+
+    // Multi-Strings.
+    assertEqual("\"hello\" 'world'", "helloworld")
+    assertEqual("'hello' \" \" 'world'", "hello world")
+
+    // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
+    // regular '%'; to get the correct result you need to add another escaped '\'.
+    // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
+    assertEqual("'pattern%'", "pattern%")
+    assertEqual("'no-pattern\\%'", "no-pattern\\%")
+    assertEqual("'pattern\\\\%'", "pattern\\%")
+    assertEqual("'pattern\\\\\\%'", "pattern\\\\%")
+
+    // Escaped characters.
+    // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
+    assertEqual("'\\0'", "\u0000") // ASCII NUL (X'00')
+    assertEqual("'\\''", "\'")     // Single quote
+    assertEqual("'\\\"'", "\"")    // Double quote
+    assertEqual("'\\b'", "\b")     // Backspace
+    assertEqual("'\\n'", "\n")     // Newline
+    assertEqual("'\\r'", "\r")     // Carriage return
+    assertEqual("'\\t'", "\t")     // Tab character
+    assertEqual("'\\Z'", "\u001A") // ASCII 26 - CTRL + Z (EOF on windows)
+
+    // Octals
+    assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!")
+
+    // Unicode
+    assertEqual("'\\u0087\\u0111\\u0114\\u0108\\u0100\\u0032\\u0058\\u0041'", "World :)")
+  }
+
+  test("intervals") {
+    def intervalLiteral(u: String, s: String): Literal = {
+      Literal(CalendarInterval.fromSingleUnitString(u, s))
+    }
+
+    // Empty interval statement
+    intercept("interval", "at least one time unit should be given for interval literal")
+
+    // Single Intervals.
+    val units = Seq(
+      "year",
+      "month",
+      "week",
+      "day",
+      "hour",
+      "minute",
+      "second",
+      "millisecond",
+      "microsecond")
+    val forms = Seq("", "s")
+    val values = Seq("0", "10", "-7", "21")
+    units.foreach { unit =>
+      forms.foreach { form =>
+         values.foreach { value =>
+           val expected = intervalLiteral(unit, value)
+           assertEqual(s"interval $value $unit$form", expected)
+           assertEqual(s"interval '$value' $unit$form", expected)
+         }
+      }
+    }
+
+    // Hive nanosecond notation.
+    assertEqual("interval 13.123456789 seconds", intervalLiteral("second", "13.123456789"))
+    assertEqual("interval -13.123456789 second", intervalLiteral("second", "-13.123456789"))
+
+    // Non Existing unit
+    intercept("interval 10 nanoseconds", "No interval can be constructed")
+
+    // Year-Month intervals.
+    val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
+    yearMonthValues.foreach { value =>
+      val result = Literal(CalendarInterval.fromYearMonthString(value))
+      assertEqual(s"interval '$value' year to month", result)
+    }
+
+    // Day-Time intervals.
+    val datTimeValues = Seq(
+      "99 11:22:33.123456789",
+      "-99 11:22:33.123456789",
+      "10 9:8:7.123456789",
+      "1 0:0:0",
+      "-1 0:0:0",
+      "1 0:0:1")
+    datTimeValues.foreach { value =>
+      val result = Literal(CalendarInterval.fromDayTimeString(value))
+      assertEqual(s"interval '$value' day to second", result)
+    }
+
+    // Unknown FROM TO intervals
+    intercept("interval 10 month to second", "Intervals FROM month TO second are not supported.")
+
+    // Composed intervals.
+    assertEqual(
+      "interval 3 months 22 seconds 1 millisecond",
+      Literal(new CalendarInterval(3, 22001000L)))
+    assertEqual(
+      "interval 3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
+      Literal(new CalendarInterval(14,
+        22 * CalendarInterval.MICROS_PER_DAY + 2 * CalendarInterval.MICROS_PER_SECOND)))
+  }
+
+  test("composed expressions") {
+    assertEqual("1 + r.r As q", (Literal(1) + UnresolvedAttribute("r.r")).as("q"))
+    assertEqual("1 - f('o', o(bar))", Literal(1) - 'f.function("o", 'o.function('bar)))
+    intercept("1 - f('o', o(bar)) hello * world", "mismatched input '*'")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/PlanParserSuite.scala
new file mode 100644
index 0000000000000..4206d22ca7ed6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/PlanParserSuite.scala
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.{BooleanType, IntegerType}
+
+class PlanParserSuite extends PlanTest {
+  import CatalystSqlParser._
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+  import org.apache.spark.sql.catalyst.dsl.plans._
+
+  def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = {
+    comparePlans(parsePlan(sqlCommand), plan)
+  }
+
+  def intercept(sqlCommand: String, messages: String*): Unit = {
+    val e = intercept[ParseException](parsePlan(sqlCommand))
+    messages.foreach { message =>
+      assert(e.message.contains(message))
+    }
+  }
+
+  test("case insensitive") {
+    val plan = table("a").select(star())
+    assertEqual("sELEct * FroM a", plan)
+    assertEqual("select * fRoM a", plan)
+    assertEqual("SELECT * FROM a", plan)
+  }
+
+  test("show functions") {
+    assertEqual("show functions", ShowFunctions(None, None))
+    assertEqual("show functions foo", ShowFunctions(None, Some("foo")))
+    assertEqual("show functions foo.bar", ShowFunctions(Some("foo"), Some("bar")))
+    assertEqual("show functions 'foo\\\\.*'", ShowFunctions(None, Some("foo\\.*")))
+    intercept("show functions foo.bar.baz", "SHOW FUNCTIONS unsupported name")
+  }
+
+  test("describe function") {
+    assertEqual("describe function bar", DescribeFunction("bar", isExtended = false))
+    assertEqual("describe function extended bar", DescribeFunction("bar", isExtended = true))
+    assertEqual("describe function foo.bar", DescribeFunction("foo.bar", isExtended = false))
+    assertEqual("describe function extended f.bar", DescribeFunction("f.bar", isExtended = true))
+  }
+
+  test("set operations") {
+    val a = table("a").select(star())
+    val b = table("b").select(star())
+
+    assertEqual("select * from a union select * from b", Distinct(a.union(b)))
+    assertEqual("select * from a union distinct select * from b", Distinct(a.union(b)))
+    assertEqual("select * from a union all select * from b", a.union(b))
+    assertEqual("select * from a except select * from b", a.except(b))
+    intercept("select * from a except all select * from b", "EXCEPT ALL is not supported.")
+    assertEqual("select * from a except distinct select * from b", a.except(b))
+    assertEqual("select * from a intersect select * from b", a.intersect(b))
+    intercept("select * from a intersect all select * from b", "INTERSECT ALL is not supported.")
+    assertEqual("select * from a intersect distinct select * from b", a.intersect(b))
+  }
+
+  test("common table expressions") {
+    def cte(plan: LogicalPlan, namedPlans: (String, LogicalPlan)*): With = {
+      val ctes = namedPlans.map {
+        case (name, cte) =>
+          name -> SubqueryAlias(name, cte)
+      }.toMap
+      With(plan, ctes)
+    }
+    assertEqual(
+      "with cte1 as (select * from a) select * from cte1",
+      cte(table("cte1").select(star()), "cte1" -> table("a").select(star())))
+    assertEqual(
+      "with cte1 (select 1) select * from cte1",
+      cte(table("cte1").select(star()), "cte1" -> OneRowRelation.select(1)))
+    assertEqual(
+      "with cte1 (select 1), cte2 as (select * from cte1) select * from cte2",
+      cte(table("cte2").select(star()),
+        "cte1" -> OneRowRelation.select(1),
+        "cte2" -> table("cte1").select(star())))
+    intercept(
+      "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
+      "Name 'cte1' is used for multiple common table expressions")
+  }
+
+  test("simple select query") {
+    assertEqual("select 1", OneRowRelation.select(1))
+    assertEqual("select a, b", OneRowRelation.select('a, 'b))
+    assertEqual("select a, b from db.c", table("db", "c").select('a, 'b))
+    assertEqual("select a, b from db.c where x < 1", table("db", "c").where('x < 1).select('a, 'b))
+    assertEqual(
+      "select a, b from db.c having x < 1",
+      table("db", "c").select('a, 'b).where(('x < 1).cast(BooleanType)))
+    assertEqual("select distinct a, b from db.c", Distinct(table("db", "c").select('a, 'b)))
+    assertEqual("select all a, b from db.c", table("db", "c").select('a, 'b))
+  }
+
+  test("reverse select query") {
+    assertEqual("from a", table("a"))
+    assertEqual("from a select b, c", table("a").select('b, 'c))
+    assertEqual(
+      "from db.a select b, c where d < 1", table("db", "a").where('d < 1).select('b, 'c))
+    assertEqual("from a select distinct b, c", Distinct(table("a").select('b, 'c)))
+    assertEqual(
+      "from (from a union all from b) c select *",
+      table("a").union(table("b")).as("c").select(star()))
+  }
+
+  test("transform query spec") {
+    val p = ScriptTransformation(Seq('a, 'b), "func", Seq.empty, table("e"), null)
+    assertEqual("select transform(a, b) using 'func' from e where f < 10",
+      p.copy(child = p.child.where('f < 10), output = Seq('key.string, 'value.string)))
+    assertEqual("map a, b using 'func' as c, d from e",
+      p.copy(output = Seq('c.string, 'd.string)))
+    assertEqual("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e",
+      p.copy(output = Seq('c.int, 'd.decimal(10, 0))))
+  }
+
+  test("multi select query") {
+    assertEqual(
+      "from a select * select * where s < 10",
+      table("a").select(star()).union(table("a").where('s < 10).select(star())))
+    intercept(
+      "from a select * select * from x where a.s < 10",
+      "Multi-Insert queries cannot have a FROM clause in their individual SELECT statements")
+    assertEqual(
+      "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
+      table("a").select(star()).insertInto("tbl1").union(
+        table("a").where('s < 10).select(star()).insertInto("tbl2")))
+  }
+
+  test("query organization") {
+    // Test all valid combinations of order by/sort by/distribute by/cluster by/limit/windows
+    val baseSql = "select * from t"
+    val basePlan = table("t").select(star())
+
+    val ws = Map("w1" -> WindowSpecDefinition(Seq.empty, Seq.empty, UnspecifiedFrame))
+    val limitWindowClauses = Seq(
+      ("", (p: LogicalPlan) => p),
+      (" limit 10", (p: LogicalPlan) => p.limit(10)),
+      (" window w1 as ()", (p: LogicalPlan) => WithWindowDefinition(ws, p)),
+      (" window w1 as () limit 10", (p: LogicalPlan) => WithWindowDefinition(ws, p).limit(10))
+    )
+
+    val orderSortDistrClusterClauses = Seq(
+      ("", basePlan),
+      (" order by a, b desc", basePlan.orderBy('a.asc, 'b.desc)),
+      (" sort by a, b desc", basePlan.sortBy('a.asc, 'b.desc)),
+      (" distribute by a, b", basePlan.distribute('a, 'b)),
+      (" distribute by a sort by b", basePlan.distribute('a).sortBy('b.asc)),
+      (" cluster by a, b", basePlan.distribute('a, 'b).sortBy('a.asc, 'b.asc))
+    )
+
+    orderSortDistrClusterClauses.foreach {
+      case (s1, p1) =>
+        limitWindowClauses.foreach {
+          case (s2, pf2) =>
+            assertEqual(baseSql + s1 + s2, pf2(p1))
+        }
+    }
+
+    val msg = "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported"
+    intercept(s"$baseSql order by a sort by a", msg)
+    intercept(s"$baseSql cluster by a distribute by a", msg)
+    intercept(s"$baseSql order by a cluster by a", msg)
+    intercept(s"$baseSql order by a distribute by a", msg)
+  }
+
+  test("insert into") {
+    val sql = "select * from t"
+    val plan = table("t").select(star())
+    def insert(
+        partition: Map[String, Option[String]],
+        overwrite: Boolean = false,
+        ifNotExists: Boolean = false): LogicalPlan =
+      InsertIntoTable(table("s"), partition, plan, overwrite, ifNotExists)
+
+    // Single inserts
+    assertEqual(s"insert overwrite table s $sql",
+      insert(Map.empty, overwrite = true))
+    assertEqual(s"insert overwrite table s if not exists $sql",
+      insert(Map.empty, overwrite = true, ifNotExists = true))
+    assertEqual(s"insert into s $sql",
+      insert(Map.empty))
+    assertEqual(s"insert into table s partition (c = 'd', e = 1) $sql",
+      insert(Map("c" -> Option("d"), "e" -> Option("1"))))
+    assertEqual(s"insert overwrite table s partition (c = 'd', x) if not exists $sql",
+      insert(Map("c" -> Option("d"), "x" -> None), overwrite = true, ifNotExists = true))
+
+    // Multi insert
+    val plan2 = table("t").where('x > 5).select(star())
+    assertEqual("from t insert into s select * limit 1 insert into u select * where x > 5",
+      InsertIntoTable(
+        table("s"), Map.empty, plan.limit(1), overwrite = false, ifNotExists = false).union(
+        InsertIntoTable(
+          table("u"), Map.empty, plan2, overwrite = false, ifNotExists = false)))
+  }
+
+  test("aggregation") {
+    val sql = "select a, b, sum(c) as c from d group by a, b"
+
+    // Normal
+    assertEqual(sql, table("d").groupBy('a, 'b)('a, 'b, 'sum.function('c).as("c")))
+
+    // Cube
+    assertEqual(s"$sql with cube",
+      table("d").groupBy(Cube(Seq('a, 'b)))('a, 'b, 'sum.function('c).as("c")))
+
+    // Rollup
+    assertEqual(s"$sql with rollup",
+      table("d").groupBy(Rollup(Seq('a, 'b)))('a, 'b, 'sum.function('c).as("c")))
+
+    // Grouping Sets
+    assertEqual(s"$sql grouping sets((a, b), (a), ())",
+      GroupingSets(Seq(0, 1, 3), Seq('a, 'b), table("d"), Seq('a, 'b, 'sum.function('c).as("c"))))
+    intercept(s"$sql grouping sets((a, b), (c), ())",
+      "c doesn't show up in the GROUP BY list")
+  }
+
+  test("limit") {
+    val sql = "select * from t"
+    val plan = table("t").select(star())
+    assertEqual(s"$sql limit 10", plan.limit(10))
+    assertEqual(s"$sql limit cast(9 / 4 as int)", plan.limit(Cast(Literal(9) / 4, IntegerType)))
+  }
+
+  test("window spec") {
+    // Note that WindowSpecs are testing in the ExpressionParserSuite
+    val sql = "select * from t"
+    val plan = table("t").select(star())
+    val spec = WindowSpecDefinition(Seq('a, 'b), Seq('c.asc),
+      SpecifiedWindowFrame(RowFrame, ValuePreceding(1), ValueFollowing(1)))
+
+    // Test window resolution.
+    val ws1 = Map("w1" -> spec, "w2" -> spec, "w3" -> spec)
+    assertEqual(
+      s"""$sql
+         |window w1 as (partition by a, b order by c rows between 1 preceding and 1 following),
+         |       w2 as w1,
+         |       w3 as w1""".stripMargin,
+      WithWindowDefinition(ws1, plan))
+
+    // Fail with no reference.
+    intercept(s"$sql window w2 as w1", "Cannot resolve window reference 'w1'")
+
+    // Fail when resolved reference is not a window spec.
+    intercept(
+      s"""$sql
+         |window w1 as (partition by a, b order by c rows between 1 preceding and 1 following),
+         |       w2 as w1,
+         |       w3 as w2""".stripMargin,
+      "Window reference 'w2' is not a window specification"
+    )
+  }
+
+  test("lateral view") {
+    // Single lateral view
+    assertEqual(
+      "select * from t lateral view explode(x) expl as x",
+      table("t")
+        .generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x"))
+        .select(star()))
+
+    // Multiple lateral views
+    assertEqual(
+      """select *
+        |from t
+        |lateral view explode(x) expl
+        |lateral view outer json_tuple(x, y) jtup q, z""".stripMargin,
+      table("t")
+        .generate(Explode('x), join = true, outer = false, Some("expl"), Seq.empty)
+        .generate(JsonTuple(Seq('x, 'y)), join = true, outer = true, Some("jtup"), Seq("q", "z"))
+        .select(star()))
+
+    // Multi-Insert lateral views.
+    val from = table("t1").generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x"))
+    assertEqual(
+      """from t1
+        |lateral view explode(x) expl as x
+        |insert into t2
+        |select *
+        |lateral view json_tuple(x, y) jtup q, z
+        |insert into t3
+        |select *
+        |where s < 10
+      """.stripMargin,
+      Union(from
+        .generate(JsonTuple(Seq('x, 'y)), join = true, outer = false, Some("jtup"), Seq("q", "z"))
+        .select(star())
+        .insertInto("t2"),
+        from.where('s < 10).select(star()).insertInto("t3")))
+
+    // Unsupported generator.
+    intercept(
+      "select * from t lateral view posexplode(x) posexpl as x, y",
+      "Generator function 'posexplode' is not supported")
+  }
+
+  test("joins") {
+    // Test single joins.
+    val testUnconditionalJoin = (sql: String, jt: JoinType) => {
+      assertEqual(
+        s"select * from t as tt $sql u",
+        table("t").as("tt").join(table("u"), jt, None).select(star()))
+    }
+    val testConditionalJoin = (sql: String, jt: JoinType) => {
+      assertEqual(
+        s"select * from t $sql u as uu on a = b",
+        table("t").join(table("u").as("uu"), jt, Option('a === 'b)).select(star()))
+    }
+    val testNaturalJoin = (sql: String, jt: JoinType) => {
+      assertEqual(
+        s"select * from t tt natural $sql u as uu",
+        table("t").as("tt").join(table("u").as("uu"), NaturalJoin(jt), None).select(star()))
+    }
+    val testUsingJoin = (sql: String, jt: JoinType) => {
+      assertEqual(
+        s"select * from t $sql u using(a, b)",
+        table("t").join(table("u"), UsingJoin(jt, Seq('a.attr, 'b.attr)), None).select(star()))
+    }
+    val testAll = Seq(testUnconditionalJoin, testConditionalJoin, testNaturalJoin, testUsingJoin)
+
+    def test(sql: String, jt: JoinType, tests: Seq[(String, JoinType) => Unit]): Unit = {
+      tests.foreach(_(sql, jt))
+    }
+    test("cross join", Inner, Seq(testUnconditionalJoin))
+    test(",", Inner, Seq(testUnconditionalJoin))
+    test("join", Inner, testAll)
+    test("inner join", Inner, testAll)
+    test("left join", LeftOuter, testAll)
+    test("left outer join", LeftOuter, testAll)
+    test("right join", RightOuter, testAll)
+    test("right outer join", RightOuter, testAll)
+    test("full join", FullOuter, testAll)
+    test("full outer join", FullOuter, testAll)
+
+    // Test multiple consecutive joins
+    assertEqual(
+      "select * from a join b join c right join d",
+      table("a").join(table("b")).join(table("c")).join(table("d"), RightOuter).select(star()))
+  }
+
+  test("sampled relations") {
+    val sql = "select * from t"
+    assertEqual(s"$sql tablesample(100 rows)",
+      table("t").limit(100).select(star()))
+    assertEqual(s"$sql tablesample(43 percent) as x",
+      Sample(0, .43d, withReplacement = false, 10L, table("t").as("x"))(true).select(star()))
+    assertEqual(s"$sql tablesample(bucket 4 out of 10) as x",
+      Sample(0, .4d, withReplacement = false, 10L, table("t").as("x"))(true).select(star()))
+    intercept(s"$sql tablesample(bucket 4 out of 10 on x) as x",
+      "TABLESAMPLE(BUCKET x OUT OF y ON id) is not supported")
+    intercept(s"$sql tablesample(bucket 11 out of 10) as x",
+      s"Sampling fraction (${11.0/10.0}) must be on interval [0, 1]")
+  }
+
+  test("sub-query") {
+    val plan = table("t0").select('id)
+    assertEqual("select id from (t0)", plan)
+    assertEqual("select id from ((((((t0))))))", plan)
+    assertEqual(
+      "(select * from t1) union distinct (select * from t2)",
+      Distinct(table("t1").select(star()).union(table("t2").select(star()))))
+    assertEqual(
+      "select * from ((select * from t1) union (select * from t2)) t",
+      Distinct(
+        table("t1").select(star()).union(table("t2").select(star()))).as("t").select(star()))
+    assertEqual(
+      """select  id
+        |from (((select id from t0)
+        |       union all
+        |       (select  id from t0))
+        |      union all
+        |      (select id from t0)) as u_1
+      """.stripMargin,
+      plan.union(plan).union(plan).as("u_1").select('id))
+  }
+
+  test("scalar sub-query") {
+    assertEqual(
+      "select (select max(b) from s) ss from t",
+      table("t").select(ScalarSubquery(table("s").select('max.function('b))).as("ss")))
+    assertEqual(
+      "select * from t where a = (select b from s)",
+      table("t").where('a === ScalarSubquery(table("s").select('b))).select(star()))
+    assertEqual(
+      "select g from t group by g having a > (select b from s)",
+      table("t")
+        .groupBy('g)('g)
+        .where(('a > ScalarSubquery(table("s").select('b))).cast(BooleanType)))
+  }
+
+  test("table reference") {
+    assertEqual("table t", table("t"))
+    assertEqual("table d.t", table("d", "t"))
+  }
+
+  test("inline table") {
+    assertEqual("values 1, 2, 3, 4", LocalRelation.fromExternalRows(
+      Seq('col1.int),
+      Seq(1, 2, 3, 4).map(x => Row(x))))
+    assertEqual(
+      "values (1, 'a'), (2, 'b'), (3, 'c') as tbl(a, b)",
+      LocalRelation.fromExternalRows(
+        Seq('a.int, 'b.string),
+        Seq((1, "a"), (2, "b"), (3, "c")).map(x => Row(x._1, x._2))).as("tbl"))
+    intercept("values (a, 'a'), (b, 'b')",
+      "All expressions in an inline table must be constants.")
+    intercept("values (1, 'a'), (2, 'b') as tbl(a, b, c)",
+      "Number of aliases must match the number of fields in an inline table.")
+    intercept[ArrayIndexOutOfBoundsException](parsePlan("values (1, 'a'), (2, 'b', 5Y)"))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/TableIdentifierParserSuite.scala
new file mode 100644
index 0000000000000..0874322187489
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ng/TableIdentifierParserSuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.parser.ng
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.TableIdentifier
+
+class TableIdentifierParserSuite extends SparkFunSuite {
+  import CatalystSqlParser._
+
+  test("table identifier") {
+    // Regular names.
+    assert(TableIdentifier("q") === parseTableIdentifier("q"))
+    assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))
+
+    // Illegal names.
+    intercept[ParseException](parseTableIdentifier(""))
+    intercept[ParseException](parseTableIdentifier("d.q.g"))
+
+    // SQL Keywords.
+    val keywords = Seq("select", "from", "where", "left", "right")
+    keywords.foreach { keyword =>
+      intercept[ParseException](parseTableIdentifier(keyword))
+      assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
+      assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 0541844e0bfcd..aa5d4330d386e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, OneRowRelation, Sample}
 import org.apache.spark.sql.catalyst.util._
 
 /**
@@ -32,6 +32,8 @@ abstract class PlanTest extends SparkFunSuite with PredicateHelper {
    */
   protected def normalizeExprIds(plan: LogicalPlan) = {
     plan transformAllExpressions {
+      case s: ScalarSubquery =>
+        ScalarSubquery(s.query, ExprId(0))
       case a: AttributeReference =>
         AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
       case a: Alias =>
@@ -40,21 +42,25 @@ abstract class PlanTest extends SparkFunSuite with PredicateHelper {
   }
 
   /**
-   * Normalizes the filter conditions that appear in the plan. For instance,
-   * ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 3 && (expr 1 && expr 2)
-   * etc., will all now be equivalent.
+   * Normalizes plans:
+   * - Filter the filter conditions that appear in a plan. For instance,
+   *   ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 3 && (expr 1 && expr 2)
+   *   etc., will all now be equivalent.
+   * - Sample the seed will replaced by 0L.
    */
-  private def normalizeFilters(plan: LogicalPlan) = {
+  private def normalizePlan(plan: LogicalPlan): LogicalPlan = {
     plan transform {
       case filter @ Filter(condition: Expression, child: LogicalPlan) =>
         Filter(splitConjunctivePredicates(condition).sortBy(_.hashCode()).reduce(And), child)
+      case sample: Sample =>
+        sample.copy(seed = 0L)(true)
     }
   }
 
   /** Fails the test if the two plans do not match */
   protected def comparePlans(plan1: LogicalPlan, plan2: LogicalPlan) {
-    val normalized1 = normalizeFilters(normalizeExprIds(plan1))
-    val normalized2 = normalizeFilters(normalizeExprIds(plan2))
+    val normalized1 = normalizePlan(normalizeExprIds(plan1))
+    val normalized2 = normalizePlan(normalizeExprIds(plan2))
     if (normalized1 != normalized2) {
       fail(
         s"""
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
index 8882903bbf8ad..1f1b5389aa7d4 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java
@@ -134,7 +134,7 @@ public UnsafeRow getAggregationBufferFromUnsafeRow(UnsafeRow key, int hash) {
     if (!loc.isDefined()) {
       // This is the first time that we've seen this grouping key, so we'll insert a copy of the
       // empty aggregation buffer into the map:
-      boolean putSucceeded = loc.putNewKey(
+      boolean putSucceeded = loc.append(
         key.getBaseObject(),
         key.getBaseOffset(),
         key.getSizeInBytes(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 6c08865fe25e1..2d47760ea7629 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -23,8 +23,10 @@ import java.util.concurrent.atomic.AtomicLong
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
+import scala.util.control.NonFatal
 
 import com.fasterxml.jackson.core.JsonFactory
+import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.api.java.JavaRDD
@@ -40,7 +42,7 @@ import org.apache.spark.sql.catalyst.optimizer.CombineUnions
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.usePrettyExpression
-import org.apache.spark.sql.execution.{FileRelation, LogicalRDD, Queryable, QueryExecution, SQLExecution}
+import org.apache.spark.sql.execution.{FileRelation, LogicalRDD, QueryExecution, SQLExecution}
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.datasources.{CreateTableUsingAsSelect, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.json.JacksonGenerator
@@ -169,10 +171,10 @@ private[sql] object Dataset {
  * @since 1.6.0
  */
 class Dataset[T] private[sql](
-    @transient override val sqlContext: SQLContext,
-    @DeveloperApi @transient override val queryExecution: QueryExecution,
+    @transient val sqlContext: SQLContext,
+    @DeveloperApi @transient val queryExecution: QueryExecution,
     encoder: Encoder[T])
-  extends Queryable with Serializable {
+  extends Serializable {
 
   queryExecution.assertAnalyzed()
 
@@ -245,7 +247,7 @@ class Dataset[T] private[sql](
    * @param _numRows Number of rows to show
    * @param truncate Whether truncate long strings and align cells right
    */
-  override private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
+  private[sql] def showString(_numRows: Int, truncate: Boolean = true): String = {
     val numRows = _numRows.max(0)
     val takeResult = take(numRows + 1)
     val hasMoreData = takeResult.length > numRows
@@ -270,7 +272,75 @@ class Dataset[T] private[sql](
       }: Seq[String]
     }
 
-    formatString ( rows, numRows, hasMoreData, truncate )
+    val sb = new StringBuilder
+    val numCols = schema.fieldNames.length
+
+    // Initialise the width of each column to a minimum value of '3'
+    val colWidths = Array.fill(numCols)(3)
+
+    // Compute the width of each column
+    for (row <- rows) {
+      for ((cell, i) <- row.zipWithIndex) {
+        colWidths(i) = math.max(colWidths(i), cell.length)
+      }
+    }
+
+    // Create SeparateLine
+    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()
+
+    // column names
+    rows.head.zipWithIndex.map { case (cell, i) =>
+      if (truncate) {
+        StringUtils.leftPad(cell, colWidths(i))
+      } else {
+        StringUtils.rightPad(cell, colWidths(i))
+      }
+    }.addString(sb, "|", "|", "|\n")
+
+    sb.append(sep)
+
+    // data
+    rows.tail.map {
+      _.zipWithIndex.map { case (cell, i) =>
+        if (truncate) {
+          StringUtils.leftPad(cell.toString, colWidths(i))
+        } else {
+          StringUtils.rightPad(cell.toString, colWidths(i))
+        }
+      }.addString(sb, "|", "|", "|\n")
+    }
+
+    sb.append(sep)
+
+    // For Data that has more than "numRows" records
+    if (hasMoreData) {
+      val rowsString = if (numRows == 1) "row" else "rows"
+      sb.append(s"only showing top $numRows $rowsString\n")
+    }
+
+    sb.toString()
+  }
+
+  override def toString: String = {
+    try {
+      val builder = new StringBuilder
+      val fields = schema.take(2).map {
+        case f => s"${f.name}: ${f.dataType.simpleString(2)}"
+      }
+      builder.append("[")
+      builder.append(fields.mkString(", "))
+      if (schema.length > 2) {
+        if (schema.length - fields.size == 1) {
+          builder.append(" ... 1 more field")
+        } else {
+          builder.append(" ... " + (schema.length - 2) + " more fields")
+        }
+      }
+      builder.append("]").toString()
+    } catch {
+      case NonFatal(e) =>
+        s"Invalid tree; ${e.getMessage}:\n$queryExecution"
+    }
   }
 
   /**
@@ -346,7 +416,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   // scalastyle:off println
-  override def printSchema(): Unit = println(schema.treeString)
+  def printSchema(): Unit = println(schema.treeString)
   // scalastyle:on println
 
   /**
@@ -355,7 +425,7 @@ class Dataset[T] private[sql](
    * @group basic
    * @since 1.6.0
    */
-  override def explain(extended: Boolean): Unit = {
+  def explain(extended: Boolean): Unit = {
     val explain = ExplainCommand(queryExecution.logical, extended = extended)
     sqlContext.executePlan(explain).executedPlan.executeCollect().foreach {
       // scalastyle:off println
@@ -370,7 +440,7 @@ class Dataset[T] private[sql](
    * @group basic
    * @since 1.6.0
    */
-  override def explain(): Unit = explain(extended = false)
+  def explain(): Unit = explain(extended = false)
 
   /**
    * Returns all column names and their data types as an array.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index eef7b5e5baa79..1cf29ec6cb9fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -57,13 +57,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
   private def logicalPlan = ds.logicalPlan
   private def sqlContext = ds.sqlContext
 
-  private def groupedData = {
-    new RelationalGroupedDataset(
-      Dataset.ofRows(sqlContext, logicalPlan),
-      groupingAttributes,
-      RelationalGroupedDataset.GroupByType)
-  }
-
   /**
    * Returns a new [[KeyValueGroupedDataset]] where the type of the key has been mapped to the
    * specified type. The mapping of key columns to the type follows the same rules as `as` on
@@ -203,12 +196,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
     reduceGroups(f.call _)
   }
 
-  private def withEncoder(c: Column): Column = c match {
-    case tc: TypedColumn[_, _] =>
-      tc.withInputType(resolvedVEncoder.bind(dataAttributes), dataAttributes)
-    case _ => c
-  }
-
   /**
    * Internal helper function for building typed aggregations that return tuples.  For simplicity
    * and code reuse, we do this without the help of the type system and then use helper functions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 6968b7bdddd41..7b7c98d0b7992 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -671,7 +671,7 @@ class SQLContext private[sql](
     sessionState.catalog.createTempTable(
       sessionState.sqlParser.parseTableIdentifier(tableName).table,
       df.logicalPlan,
-      ignoreIfExists = true)
+      overrideIfExists = true)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala
deleted file mode 100644
index 38263af0f7e30..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Queryable.scala
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution
-
-import scala.util.control.NonFatal
-
-import org.apache.commons.lang3.StringUtils
-
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.types.StructType
-
-/** A trait that holds shared code between DataFrames and Datasets. */
-private[sql] trait Queryable {
-  def schema: StructType
-  def queryExecution: QueryExecution
-  def sqlContext: SQLContext
-
-  override def toString: String = {
-    try {
-      val builder = new StringBuilder
-      val fields = schema.take(2).map {
-        case f => s"${f.name}: ${f.dataType.simpleString(2)}"
-      }
-      builder.append("[")
-      builder.append(fields.mkString(", "))
-      if (schema.length > 2) {
-        if (schema.length - fields.size == 1) {
-          builder.append(" ... 1 more field")
-        } else {
-          builder.append(" ... " + (schema.length - 2) + " more fields")
-        }
-      }
-      builder.append("]").toString()
-    } catch {
-      case NonFatal(e) =>
-        s"Invalid tree; ${e.getMessage}:\n$queryExecution"
-    }
-  }
-
-  def printSchema(): Unit
-
-  def explain(extended: Boolean): Unit
-
-  def explain(): Unit
-
-  private[sql] def showString(_numRows: Int, truncate: Boolean = true): String
-
-  /**
-   * Format the string representing rows for output
-   * @param rows The rows to show
-   * @param numRows Number of rows to show
-   * @param hasMoreData Whether some rows are not shown due to the limit
-   * @param truncate Whether truncate long strings and align cells right
-   *
-   */
-  private[sql] def formatString (
-      rows: Seq[Seq[String]],
-      numRows: Int,
-      hasMoreData : Boolean,
-      truncate: Boolean = true): String = {
-    val sb = new StringBuilder
-    val numCols = schema.fieldNames.length
-
-    // Initialise the width of each column to a minimum value of '3'
-    val colWidths = Array.fill(numCols)(3)
-
-    // Compute the width of each column
-    for (row <- rows) {
-      for ((cell, i) <- row.zipWithIndex) {
-        colWidths(i) = math.max(colWidths(i), cell.length)
-      }
-    }
-
-    // Create SeparateLine
-    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()
-
-    // column names
-    rows.head.zipWithIndex.map { case (cell, i) =>
-      if (truncate) {
-        StringUtils.leftPad(cell, colWidths(i))
-      } else {
-        StringUtils.rightPad(cell, colWidths(i))
-      }
-    }.addString(sb, "|", "|", "|\n")
-
-    sb.append(sep)
-
-    // data
-    rows.tail.map {
-      _.zipWithIndex.map { case (cell, i) =>
-        if (truncate) {
-          StringUtils.leftPad(cell.toString, colWidths(i))
-        } else {
-          StringUtils.rightPad(cell.toString, colWidths(i))
-        }
-      }.addString(sb, "|", "|", "|\n")
-    }
-
-    sb.append(sep)
-
-    // For Data that has more than "numRows" records
-    if (hasMoreData) {
-      val rowsString = if (numRows == 1) "row" else "rows"
-      sb.append(s"only showing top $numRows $rowsString\n")
-    }
-
-    sb.toString()
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
new file mode 100644
index 0000000000000..a8313deeefc2f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -0,0 +1,833 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.parser.ng.{AbstractSqlParser, AstBuilder, ParseException}
+import org.apache.spark.sql.catalyst.parser.ng.SqlBaseParser._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.execution.command.{DescribeCommand => _, _}
+import org.apache.spark.sql.execution.datasources._
+
+/**
+ * Concrete parser for Spark SQL statements.
+ */
+object SparkSqlParser extends AbstractSqlParser{
+  val astBuilder = new SparkSqlAstBuilder
+}
+
+/**
+ * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
+ */
+class SparkSqlAstBuilder extends AstBuilder {
+  import org.apache.spark.sql.catalyst.parser.ng.ParserUtils._
+
+  /**
+   * Create a [[SetCommand]] logical plan.
+   *
+   * Note that we assume that everything after the SET keyword is assumed to be a part of the
+   * key-value pair. The split between key and value is made by searching for the first `=`
+   * character in the raw string.
+   */
+  override def visitSetConfiguration(ctx: SetConfigurationContext): LogicalPlan = withOrigin(ctx) {
+    // Construct the command.
+    val raw = remainder(ctx.SET.getSymbol)
+    val keyValueSeparatorIndex = raw.indexOf('=')
+    if (keyValueSeparatorIndex >= 0) {
+      val key = raw.substring(0, keyValueSeparatorIndex).trim
+      val value = raw.substring(keyValueSeparatorIndex + 1).trim
+      SetCommand(Some(key -> Option(value)))
+    } else if (raw.nonEmpty) {
+      SetCommand(Some(raw.trim -> None))
+    } else {
+      SetCommand(None)
+    }
+  }
+
+  /**
+   * Create a [[SetDatabaseCommand]] logical plan.
+   */
+  override def visitUse(ctx: UseContext): LogicalPlan = withOrigin(ctx) {
+    SetDatabaseCommand(ctx.db.getText)
+  }
+
+  /**
+   * Create a [[ShowTablesCommand]] logical plan.
+   */
+  override def visitShowTables(ctx: ShowTablesContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.LIKE != null) {
+      logWarning("SHOW TABLES LIKE option is ignored.")
+    }
+    ShowTablesCommand(Option(ctx.db).map(_.getText))
+  }
+
+  /**
+   * Create a [[RefreshTable]] logical plan.
+   */
+  override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) {
+    RefreshTable(visitTableIdentifier(ctx.tableIdentifier))
+  }
+
+  /**
+   * Create a [[CacheTableCommand]] logical plan.
+   */
+  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
+    val query = Option(ctx.query).map(plan)
+    CacheTableCommand(ctx.identifier.getText, query, ctx.LAZY != null)
+  }
+
+  /**
+   * Create an [[UncacheTableCommand]] logical plan.
+   */
+  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
+    UncacheTableCommand(ctx.identifier.getText)
+  }
+
+  /**
+   * Create a [[ClearCacheCommand]] logical plan.
+   */
+  override def visitClearCache(ctx: ClearCacheContext): LogicalPlan = withOrigin(ctx) {
+    ClearCacheCommand
+  }
+
+  /**
+   * Create an [[ExplainCommand]] logical plan.
+   */
+  override def visitExplain(ctx: ExplainContext): LogicalPlan = withOrigin(ctx) {
+    val options = ctx.explainOption.asScala
+    if (options.exists(_.FORMATTED != null)) {
+      logWarning("EXPLAIN FORMATTED option is ignored.")
+    }
+    if (options.exists(_.LOGICAL != null)) {
+      logWarning("EXPLAIN LOGICAL option is ignored.")
+    }
+
+    // Create the explain comment.
+    val statement = plan(ctx.statement)
+    if (isExplainableStatement(statement)) {
+      ExplainCommand(statement, extended = options.exists(_.EXTENDED != null))
+    } else {
+      ExplainCommand(OneRowRelation)
+    }
+  }
+
+  /**
+   * Determine if a plan should be explained at all.
+   */
+  protected def isExplainableStatement(plan: LogicalPlan): Boolean = plan match {
+    case _: datasources.DescribeCommand => false
+    case _ => true
+  }
+
+  /**
+   * Create a [[DescribeCommand]] logical plan.
+   */
+  override def visitDescribeTable(ctx: DescribeTableContext): LogicalPlan = withOrigin(ctx) {
+    // FORMATTED and columns are not supported. Return null and let the parser decide what to do
+    // with this (create an exception or pass it on to a different system).
+    if (ctx.describeColName != null || ctx.FORMATTED != null || ctx.partitionSpec != null) {
+      null
+    } else {
+      datasources.DescribeCommand(
+        visitTableIdentifier(ctx.tableIdentifier),
+        ctx.EXTENDED != null)
+    }
+  }
+
+  /** Type to keep track of a table header. */
+  type TableHeader = (TableIdentifier, Boolean, Boolean, Boolean)
+
+  /**
+   * Validate a create table statement and return the [[TableIdentifier]].
+   */
+  override def visitCreateTableHeader(
+      ctx: CreateTableHeaderContext): TableHeader = withOrigin(ctx) {
+    val temporary = ctx.TEMPORARY != null
+    val ifNotExists = ctx.EXISTS != null
+    assert(!temporary || !ifNotExists,
+      "a CREATE TEMPORARY TABLE statement does not allow IF NOT EXISTS clause.",
+      ctx)
+    (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null)
+  }
+
+  /**
+   * Create a [[CreateTableUsing]] or a [[CreateTableUsingAsSelect]] logical plan.
+   *
+   * TODO add bucketing and partitioning.
+   */
+  override def visitCreateTableUsing(ctx: CreateTableUsingContext): LogicalPlan = withOrigin(ctx) {
+    val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
+    if (external) {
+      logWarning("EXTERNAL option is not supported.")
+    }
+    val options = Option(ctx.tablePropertyList).map(visitTablePropertyList).getOrElse(Map.empty)
+    val provider = ctx.tableProvider.qualifiedName.getText
+
+    if (ctx.query != null) {
+      // Get the backing query.
+      val query = plan(ctx.query)
+
+      // Determine the storage mode.
+      val mode = if (ifNotExists) {
+        SaveMode.Ignore
+      } else if (temp) {
+        SaveMode.Overwrite
+      } else {
+        SaveMode.ErrorIfExists
+      }
+      CreateTableUsingAsSelect(table, provider, temp, Array.empty, None, mode, options, query)
+    } else {
+      val struct = Option(ctx.colTypeList).map(createStructType)
+      CreateTableUsing(table, struct, provider, temp, options, ifNotExists, managedIfNoPath = false)
+    }
+  }
+
+  /**
+   * Convert a table property list into a key-value map.
+   */
+  override def visitTablePropertyList(
+      ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) {
+    ctx.tableProperty.asScala.map { property =>
+      // A key can either be a String or a collection of dot separated elements. We need to treat
+      // these differently.
+      val key = if (property.key.STRING != null) {
+        string(property.key.STRING)
+      } else {
+        property.key.getText
+      }
+      val value = Option(property.value).map(string).orNull
+      key -> value
+    }.toMap
+  }
+
+  /**
+   * Create a [[CreateDatabase]] command.
+   *
+   * For example:
+   * {{{
+   *   CREATE DATABASE [IF NOT EXISTS] database_name [COMMENT database_comment]
+   *    [LOCATION path] [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
+   * }}}
+   */
+  override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
+    CreateDatabase(
+      ctx.identifier.getText,
+      ctx.EXISTS != null,
+      Option(ctx.locationSpec).map(visitLocationSpec),
+      Option(ctx.comment).map(string),
+      Option(ctx.tablePropertyList).map(visitTablePropertyList).getOrElse(Map.empty))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterDatabaseProperties]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER (DATABASE|SCHEMA) database SET DBPROPERTIES (property_name=property_value, ...);
+   * }}}
+   */
+  override def visitSetDatabaseProperties(
+      ctx: SetDatabasePropertiesContext): LogicalPlan = withOrigin(ctx) {
+    AlterDatabaseProperties(
+      ctx.identifier.getText,
+      visitTablePropertyList(ctx.tablePropertyList))(
+      command(ctx))
+  }
+
+  /**
+   * Create a [[DropDatabase]] command.
+   *
+   * For example:
+   * {{{
+   *   DROP (DATABASE|SCHEMA) [IF EXISTS] database [RESTRICT|CASCADE];
+   * }}}
+   */
+  override def visitDropDatabase(ctx: DropDatabaseContext): LogicalPlan = withOrigin(ctx) {
+    DropDatabase(ctx.identifier.getText, ctx.EXISTS != null, ctx.CASCADE == null)(command(ctx))
+  }
+
+  /**
+   * Create a [[DescribeDatabase]] command.
+   *
+   * For example:
+   * {{{
+   *   DESCRIBE DATABASE [EXTENDED] database;
+   * }}}
+   */
+  override def visitDescribeDatabase(ctx: DescribeDatabaseContext): LogicalPlan = withOrigin(ctx) {
+    DescribeDatabase(ctx.identifier.getText, ctx.EXTENDED != null)(command(ctx))
+  }
+
+  /**
+   * Create a [[CreateFunction]] command.
+   *
+   * For example:
+   * {{{
+   *   CREATE [TEMPORARY] FUNCTION [db_name.]function_name AS class_name
+   *    [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri']];
+   * }}}
+   */
+  override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan = withOrigin(ctx) {
+    val resources = ctx.resource.asScala.map { resource =>
+      val resourceType = resource.identifier.getText.toLowerCase
+      resourceType match {
+        case "jar" | "file" | "archive" =>
+          resourceType -> string(resource.STRING)
+        case other =>
+          throw new ParseException(s"Resource Type '$resourceType' is not supported.", ctx)
+      }
+    }
+
+    // Extract database, name & alias.
+    val (database, function) = visitFunctionName(ctx.qualifiedName)
+    CreateFunction(
+      database,
+      function,
+      string(ctx.className), // TODO this is not an alias.
+      resources,
+      ctx.TEMPORARY != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create a [[DropFunction]] command.
+   *
+   * For example:
+   * {{{
+   *   DROP [TEMPORARY] FUNCTION [IF EXISTS] function;
+   * }}}
+   */
+  override def visitDropFunction(ctx: DropFunctionContext): LogicalPlan = withOrigin(ctx) {
+    val (database, function) = visitFunctionName(ctx.qualifiedName)
+    DropFunction(database, function, ctx.EXISTS != null, ctx.TEMPORARY != null)(command(ctx))
+  }
+
+  /**
+   * Create a function database (optional) and name pair.
+   */
+  private def visitFunctionName(ctx: QualifiedNameContext): (Option[String], String) = {
+    ctx.identifier().asScala.map(_.getText) match {
+      case Seq(db, fn) => (Option(db), fn)
+      case Seq(fn) => (None, fn)
+      case other => throw new ParseException(s"Unsupported function name '${ctx.getText}'", ctx)
+    }
+  }
+
+  /**
+   * Create a [[AlterTableRename]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table1 RENAME TO table2;
+   * }}}
+   */
+  override def visitRenameTable(ctx: RenameTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableRename(
+      visitTableIdentifier(ctx.from),
+      visitTableIdentifier(ctx.to))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableSetProperties]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table SET TBLPROPERTIES ('comment' = new_comment);
+   * }}}
+   */
+  override def visitSetTableProperties(
+      ctx: SetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableSetProperties(
+      visitTableIdentifier(ctx.tableIdentifier),
+      visitTablePropertyList(ctx.tablePropertyList))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableUnsetProperties]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table UNSET TBLPROPERTIES IF EXISTS ('comment', 'key');
+   * }}}
+   */
+  override def visitUnsetTableProperties(
+      ctx: UnsetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableUnsetProperties(
+      visitTableIdentifier(ctx.tableIdentifier),
+      visitTablePropertyList(ctx.tablePropertyList),
+      ctx.EXISTS != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableSerDeProperties]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table [PARTITION spec] SET SERDE serde_name [WITH SERDEPROPERTIES props];
+   *   ALTER TABLE table [PARTITION spec] SET SERDEPROPERTIES serde_properties;
+   * }}}
+   */
+  override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableSerDeProperties(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.STRING).map(string),
+      Option(ctx.tablePropertyList).map(visitTablePropertyList),
+      // TODO a partition spec is allowed to have optional values. This is currently violated.
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableStorageProperties]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table CLUSTERED BY (col, ...) [SORTED BY (col, ...)] INTO n BUCKETS;
+   * }}}
+   */
+  override def visitBucketTable(ctx: BucketTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableStorageProperties(
+      visitTableIdentifier(ctx.tableIdentifier),
+      visitBucketSpec(ctx.bucketSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableNotClustered]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table NOT CLUSTERED;
+   * }}}
+   */
+  override def visitUnclusterTable(ctx: UnclusterTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableNotClustered(visitTableIdentifier(ctx.tableIdentifier))(command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableNotSorted]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table NOT SORTED;
+   * }}}
+   */
+  override def visitUnsortTable(ctx: UnsortTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableNotSorted(visitTableIdentifier(ctx.tableIdentifier))(command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableSkewed]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table SKEWED BY (col1, col2)
+   *   ON ((col1_value, col2_value) [, (col1_value, col2_value), ...])
+   *   [STORED AS DIRECTORIES];
+   * }}}
+   */
+  override def visitSkewTable(ctx: SkewTableContext): LogicalPlan = withOrigin(ctx) {
+    val table = visitTableIdentifier(ctx.tableIdentifier)
+    val (cols, values, storedAsDirs) = visitSkewSpec(ctx.skewSpec)
+    AlterTableSkewed(table, cols, values, storedAsDirs)(command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableNotSorted]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table NOT SKEWED;
+   * }}}
+   */
+  override def visitUnskewTable(ctx: UnskewTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableNotSkewed(visitTableIdentifier(ctx.tableIdentifier))(command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableNotStoredAsDirs]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table NOT STORED AS DIRECTORIES
+   * }}}
+   */
+  override def visitUnstoreTable(ctx: UnstoreTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableNotStoredAsDirs(visitTableIdentifier(ctx.tableIdentifier))(command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableSkewedLocation]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table SET SKEWED LOCATION (col1="loc1" [, (col2, col3)="loc2", ...] );
+   * }}}
+   */
+  override def visitSetTableSkewLocations(
+      ctx: SetTableSkewLocationsContext): LogicalPlan = withOrigin(ctx) {
+    val skewedMap = ctx.skewedLocationList.skewedLocation.asScala.flatMap {
+      slCtx =>
+        val location = string(slCtx.STRING)
+        if (slCtx.constant != null) {
+          Seq(visitStringConstant(slCtx.constant) -> location)
+        } else {
+          // TODO this is similar to what was in the original implementation. However this does not
+          // make to much sense to me since we should be storing a tuple of values (not column
+          // names) for which we want a dedicated storage location.
+          visitConstantList(slCtx.constantList).map(_ -> location)
+        }
+    }.toMap
+
+    AlterTableSkewedLocation(
+      visitTableIdentifier(ctx.tableIdentifier),
+      skewedMap)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableAddPartition]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table ADD [IF NOT EXISTS] PARTITION spec [LOCATION 'loc1']
+   * }}}
+   */
+  override def visitAddTablePartition(
+      ctx: AddTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    // Create partition spec to location mapping.
+    val specsAndLocs = ctx.partitionSpecLocation.asScala.map {
+      splCtx =>
+        val spec = visitNonOptionalPartitionSpec(splCtx.partitionSpec)
+        val location = Option(splCtx.locationSpec).map(visitLocationSpec)
+        spec -> location
+    }
+    AlterTableAddPartition(
+      visitTableIdentifier(ctx.tableIdentifier),
+      specsAndLocs,
+      ctx.EXISTS != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableExchangePartition]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table1 EXCHANGE PARTITION spec WITH TABLE table2;
+   * }}}
+   */
+  override def visitExchangeTablePartition(
+      ctx: ExchangeTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableExchangePartition(
+      visitTableIdentifier(ctx.from),
+      visitTableIdentifier(ctx.to),
+      visitNonOptionalPartitionSpec(ctx.partitionSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableRenamePartition]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table PARTITION spec1 RENAME TO PARTITION spec2;
+   * }}}
+   */
+  override def visitRenameTablePartition(
+      ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableRenamePartition(
+      visitTableIdentifier(ctx.tableIdentifier),
+      visitNonOptionalPartitionSpec(ctx.from),
+      visitNonOptionalPartitionSpec(ctx.to))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableDropPartition]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE];
+   * }}}
+   */
+  override def visitDropTablePartitions(
+      ctx: DropTablePartitionsContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableDropPartition(
+      visitTableIdentifier(ctx.tableIdentifier),
+      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec),
+      ctx.EXISTS != null,
+      ctx.PURGE != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableArchivePartition]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table ARCHIVE PARTITION spec;
+   * }}}
+   */
+  override def visitArchiveTablePartition(
+      ctx: ArchiveTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableArchivePartition(
+      visitTableIdentifier(ctx.tableIdentifier),
+      visitNonOptionalPartitionSpec(ctx.partitionSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableUnarchivePartition]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table UNARCHIVE PARTITION spec;
+   * }}}
+   */
+  override def visitUnarchiveTablePartition(
+      ctx: UnarchiveTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableUnarchivePartition(
+      visitTableIdentifier(ctx.tableIdentifier),
+      visitNonOptionalPartitionSpec(ctx.partitionSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableSetFileFormat]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table [PARTITION spec] SET FILEFORMAT file_format;
+   * }}}
+   */
+  override def visitSetTableFileFormat(
+      ctx: SetTableFileFormatContext): LogicalPlan = withOrigin(ctx) {
+    // AlterTableSetFileFormat currently takes both a GenericFileFormat and a
+    // TableFileFormatContext. This is a bit weird because it should only take one. It also should
+    // use a CatalogFileFormat instead of either a String or a Sequence of Strings. We will address
+    // this in a follow-up PR.
+    val (fileFormat, genericFormat) = ctx.fileFormat match {
+      case s: GenericFileFormatContext =>
+        (Seq.empty[String], Option(s.identifier.getText))
+      case s: TableFileFormatContext =>
+        val elements = Seq(s.inFmt, s.outFmt) ++
+          Option(s.serdeCls).toSeq ++
+          Option(s.inDriver).toSeq ++
+          Option(s.outDriver).toSeq
+        (elements.map(string), None)
+    }
+    AlterTableSetFileFormat(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
+      fileFormat,
+      genericFormat)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableSetLocation]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table [PARTITION spec] SET LOCATION "loc";
+   * }}}
+   */
+  override def visitSetTableLocation(ctx: SetTableLocationContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableSetLocation(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
+      visitLocationSpec(ctx.locationSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableTouch]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table TOUCH [PARTITION spec];
+   * }}}
+   */
+  override def visitTouchTable(ctx: TouchTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableTouch(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableCompact]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table [PARTITION spec] COMPACT 'compaction_type';
+   * }}}
+   */
+  override def visitCompactTable(ctx: CompactTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableCompact(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
+      string(ctx.STRING))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableMerge]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table [PARTITION spec] CONCATENATE;
+   * }}}
+   */
+  override def visitConcatenateTable(ctx: ConcatenateTableContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableMerge(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableChangeCol]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE tableIdentifier [PARTITION spec]
+   *    CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment]
+   *    [FIRST|AFTER column_name] [CASCADE|RESTRICT];
+   * }}}
+   */
+  override def visitChangeColumn(ctx: ChangeColumnContext): LogicalPlan = withOrigin(ctx) {
+    val col = visitColType(ctx.colType())
+    val comment = if (col.metadata.contains("comment")) {
+      Option(col.metadata.getString("comment"))
+    } else {
+      None
+    }
+
+    AlterTableChangeCol(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
+      ctx.oldName.getText,
+      // We could also pass in a struct field - seems easier.
+      col.name,
+      col.dataType,
+      comment,
+      Option(ctx.after).map(_.getText),
+      // Note that Restrict and Cascade are mutually exclusive.
+      ctx.RESTRICT != null,
+      ctx.CASCADE != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableAddCol]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE tableIdentifier [PARTITION spec]
+   *    ADD COLUMNS (name type [COMMENT comment], ...) [CASCADE|RESTRICT]
+   * }}}
+   */
+  override def visitAddColumns(ctx: AddColumnsContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableAddCol(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
+      createStructType(ctx.colTypeList),
+      // Note that Restrict and Cascade are mutually exclusive.
+      ctx.RESTRICT != null,
+      ctx.CASCADE != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create an [[AlterTableReplaceCol]] command
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE tableIdentifier [PARTITION spec]
+   *    REPLACE COLUMNS (name type [COMMENT comment], ...) [CASCADE|RESTRICT]
+   * }}}
+   */
+  override def visitReplaceColumns(ctx: ReplaceColumnsContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableReplaceCol(
+      visitTableIdentifier(ctx.tableIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
+      createStructType(ctx.colTypeList),
+      // Note that Restrict and Cascade are mutually exclusive.
+      ctx.RESTRICT != null,
+      ctx.CASCADE != null)(
+      command(ctx))
+  }
+
+  /**
+   * Create location string.
+   */
+  override def visitLocationSpec(ctx: LocationSpecContext): String = withOrigin(ctx) {
+    string(ctx.STRING)
+  }
+
+  /**
+   * Create a [[BucketSpec]].
+   */
+  override def visitBucketSpec(ctx: BucketSpecContext): BucketSpec = withOrigin(ctx) {
+    BucketSpec(
+      ctx.INTEGER_VALUE.getText.toInt,
+      visitIdentifierList(ctx.identifierList),
+      Option(ctx.orderedIdentifierList).toSeq
+        .flatMap(_.orderedIdentifier.asScala)
+        .map(_.identifier.getText))
+  }
+
+  /**
+   * Create a skew specification. This contains three components:
+   * - The Skewed Columns
+   * - Values for which are skewed. The size of each entry must match the number of skewed columns.
+   * - A store in directory flag.
+   */
+  override def visitSkewSpec(
+      ctx: SkewSpecContext): (Seq[String], Seq[Seq[String]], Boolean) = withOrigin(ctx) {
+    val skewedValues = if (ctx.constantList != null) {
+      Seq(visitConstantList(ctx.constantList))
+    } else {
+      visitNestedConstantList(ctx.nestedConstantList)
+    }
+    (visitIdentifierList(ctx.identifierList), skewedValues, ctx.DIRECTORIES != null)
+  }
+
+  /**
+   * Convert a nested constants list into a sequence of string sequences.
+   */
+  override def visitNestedConstantList(
+      ctx: NestedConstantListContext): Seq[Seq[String]] = withOrigin(ctx) {
+    ctx.constantList.asScala.map(visitConstantList)
+  }
+
+  /**
+   * Convert a constants list into a String sequence.
+   */
+  override def visitConstantList(ctx: ConstantListContext): Seq[String] = withOrigin(ctx) {
+    ctx.constant.asScala.map(visitStringConstant)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index 24923bbb10c74..877e159fbd0fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -107,7 +107,7 @@ case class CreateTempTableUsing(
     sqlContext.sessionState.catalog.createTempTable(
       tableIdent.table,
       Dataset.ofRows(sqlContext, LogicalRelation(dataSource.resolveRelation())).logicalPlan,
-      ignoreIfExists = true)
+      overrideIfExists = true)
 
     Seq.empty[Row]
   }
@@ -138,7 +138,7 @@ case class CreateTempTableUsingAsSelect(
     sqlContext.sessionState.catalog.createTempTable(
       tableIdent.table,
       Dataset.ofRows(sqlContext, LogicalRelation(result)).logicalPlan,
-      ignoreIfExists = true)
+      overrideIfExists = true)
 
     Seq.empty[Row]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 8cc352863902c..dc4793e85acd2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -18,18 +18,18 @@
 package org.apache.spark.sql.execution.joins
 
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
-import java.nio.ByteOrder
 import java.util.{HashMap => JavaHashMap}
 
-import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.{SparkConf, SparkEnv, SparkException, TaskContext}
 import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode
 import org.apache.spark.sql.execution.SparkSqlSerializer
 import org.apache.spark.unsafe.Platform
+import org.apache.spark.unsafe.hash.Murmur3_x86_32
 import org.apache.spark.unsafe.map.BytesToBytesMap
-import org.apache.spark.util.{KnownSizeEstimation, SizeEstimator, Utils}
+import org.apache.spark.util.{KnownSizeEstimation, Utils}
 import org.apache.spark.util.collection.CompactBuffer
 
 /**
@@ -54,6 +54,11 @@ private[execution] sealed trait HashedRelation {
     */
   def getMemorySize: Long = 1L  // to make the test happy
 
+  /**
+   * Release any used resources.
+   */
+  def close(): Unit = {}
+
   // This is a helper method to implement Externalizable, and is used by
   // GeneralHashedRelation and UniqueKeyHashedRelation
   protected def writeBytes(out: ObjectOutput, serialized: Array[Byte]): Unit = {
@@ -132,163 +137,83 @@ private[execution] object HashedRelation {
 }
 
 /**
- * A HashedRelation for UnsafeRow, which is backed by HashMap or BytesToBytesMap that maps the key
- * into a sequence of values.
- *
- * When it's created, it uses HashMap. After it's serialized and deserialized, it switch to use
- * BytesToBytesMap for better memory performance (multiple values for the same are stored as a
- * continuous byte array.
+ * A HashedRelation for UnsafeRow, which is backed BytesToBytesMap.
  *
  * It's serialized in the following format:
  *  [number of keys]
- *  [size of key] [size of all values in bytes] [key bytes] [bytes for all values]
- *  ...
- *
- * All the values are serialized as following:
- *   [number of fields] [number of bytes] [underlying bytes of UnsafeRow]
- *   ...
+ *  [size of key] [size of value] [key bytes] [bytes for value]
  */
-private[joins] final class UnsafeHashedRelation(
-    private var hashTable: JavaHashMap[UnsafeRow, CompactBuffer[UnsafeRow]])
-  extends HashedRelation
-  with KnownSizeEstimation
-  with Externalizable {
-
-  private[joins] def this() = this(null)  // Needed for serialization
+private[joins] class UnsafeHashedRelation(
+    private var numFields: Int,
+    private var binaryMap: BytesToBytesMap)
+  extends HashedRelation with KnownSizeEstimation with Externalizable {
 
-  // Use BytesToBytesMap in executor for better performance (it's created when deserialization)
-  // This is used in broadcast joins and distributed mode only
-  @transient private[this] var binaryMap: BytesToBytesMap = _
+  private[joins] def this() = this(0, null)  // Needed for serialization
 
-  /**
-   * Return the size of the unsafe map on the executors.
-   *
-   * For broadcast joins, this hashed relation is bigger on the driver because it is
-   * represented as a Java hash map there. While serializing the map to the executors,
-   * however, we rehash the contents in a binary map to reduce the memory footprint on
-   * the executors.
-   *
-   * For non-broadcast joins or in local mode, return 0.
-   */
   override def getMemorySize: Long = {
-    if (binaryMap != null) {
-      binaryMap.getTotalMemoryConsumption
-    } else {
-      0
-    }
+    binaryMap.getTotalMemoryConsumption
   }
 
   override def estimatedSize: Long = {
-    if (binaryMap != null) {
-      binaryMap.getTotalMemoryConsumption
-    } else {
-      SizeEstimator.estimate(hashTable)
-    }
+    binaryMap.getTotalMemoryConsumption
   }
 
   override def get(key: InternalRow): Seq[InternalRow] = {
     val unsafeKey = key.asInstanceOf[UnsafeRow]
-
-    if (binaryMap != null) {
-      // Used in Broadcast join
-      val map = binaryMap  // avoid the compiler error
-      val loc = new map.Location  // this could be allocated in stack
-      binaryMap.safeLookup(unsafeKey.getBaseObject, unsafeKey.getBaseOffset,
-        unsafeKey.getSizeInBytes, loc, unsafeKey.hashCode())
-      if (loc.isDefined) {
-        val buffer = CompactBuffer[UnsafeRow]()
-
-        val base = loc.getValueBase
-        var offset = loc.getValueOffset
-        val last = offset + loc.getValueLength
-        while (offset < last) {
-          val numFields = Platform.getInt(base, offset)
-          val sizeInBytes = Platform.getInt(base, offset + 4)
-          offset += 8
-
-          val row = new UnsafeRow(numFields)
-          row.pointTo(base, offset, sizeInBytes)
-          buffer += row
-          offset += sizeInBytes
-        }
-        buffer
-      } else {
-        null
+    val map = binaryMap  // avoid the compiler error
+    val loc = new map.Location  // this could be allocated in stack
+    binaryMap.safeLookup(unsafeKey.getBaseObject, unsafeKey.getBaseOffset,
+      unsafeKey.getSizeInBytes, loc, unsafeKey.hashCode())
+    if (loc.isDefined) {
+      val buffer = CompactBuffer[UnsafeRow]()
+      val row = new UnsafeRow(numFields)
+      row.pointTo(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
+      buffer += row
+      while (loc.nextValue()) {
+        val row = new UnsafeRow(numFields)
+        row.pointTo(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
+        buffer += row
       }
-
+      buffer
     } else {
-      // Use the Java HashMap in local mode or for non-broadcast joins (e.g. ShuffleHashJoin)
-      hashTable.get(unsafeKey)
+      null
     }
   }
 
-  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
-    if (binaryMap != null) {
-      // This could happen when a cached broadcast object need to be dumped into disk to free memory
-      out.writeInt(binaryMap.numElements())
-
-      var buffer = new Array[Byte](64)
-      def write(base: Object, offset: Long, length: Int): Unit = {
-        if (buffer.length < length) {
-          buffer = new Array[Byte](length)
-        }
-        Platform.copyMemory(base, offset, buffer, Platform.BYTE_ARRAY_OFFSET, length)
-        out.write(buffer, 0, length)
-      }
+  override def close(): Unit = {
+    binaryMap.free()
+  }
 
-      val iter = binaryMap.iterator()
-      while (iter.hasNext) {
-        val loc = iter.next()
-        // [key size] [values size] [key bytes] [values bytes]
-        out.writeInt(loc.getKeyLength)
-        out.writeInt(loc.getValueLength)
-        write(loc.getKeyBase, loc.getKeyOffset, loc.getKeyLength)
-        write(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
+  override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
+    out.writeInt(numFields)
+    // TODO: move these into BytesToBytesMap
+    out.writeInt(binaryMap.numKeys())
+    out.writeInt(binaryMap.numValues())
+
+    var buffer = new Array[Byte](64)
+    def write(base: Object, offset: Long, length: Int): Unit = {
+      if (buffer.length < length) {
+        buffer = new Array[Byte](length)
       }
+      Platform.copyMemory(base, offset, buffer, Platform.BYTE_ARRAY_OFFSET, length)
+      out.write(buffer, 0, length)
+    }
 
-    } else {
-      assert(hashTable != null)
-      out.writeInt(hashTable.size())
-
-      val iter = hashTable.entrySet().iterator()
-      while (iter.hasNext) {
-        val entry = iter.next()
-        val key = entry.getKey
-        val values = entry.getValue
-
-        // write all the values as single byte array
-        var totalSize = 0L
-        var i = 0
-        while (i < values.length) {
-          totalSize += values(i).getSizeInBytes + 4 + 4
-          i += 1
-        }
-        assert(totalSize < Integer.MAX_VALUE, "values are too big")
-
-        // [key size] [values size] [key bytes] [values bytes]
-        out.writeInt(key.getSizeInBytes)
-        out.writeInt(totalSize.toInt)
-        out.write(key.getBytes)
-        i = 0
-        while (i < values.length) {
-          // [num of fields] [num of bytes] [row bytes]
-          // write the integer in native order, so they can be read by UNSAFE.getInt()
-          if (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN) {
-            out.writeInt(values(i).numFields())
-            out.writeInt(values(i).getSizeInBytes)
-          } else {
-            out.writeInt(Integer.reverseBytes(values(i).numFields()))
-            out.writeInt(Integer.reverseBytes(values(i).getSizeInBytes))
-          }
-          out.write(values(i).getBytes)
-          i += 1
-        }
-      }
+    val iter = binaryMap.iterator()
+    while (iter.hasNext) {
+      val loc = iter.next()
+      // [key size] [values size] [key bytes] [value bytes]
+      out.writeInt(loc.getKeyLength)
+      out.writeInt(loc.getValueLength)
+      write(loc.getKeyBase, loc.getKeyOffset, loc.getKeyLength)
+      write(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
     }
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
+    numFields = in.readInt()
     val nKeys = in.readInt()
+    val nValues = in.readInt()
     // This is used in Broadcast, shared by multiple tasks, so we use on-heap memory
     // TODO(josh): This needs to be revisited before we merge this patch; making this change now
     // so that tests compile:
@@ -314,7 +239,7 @@ private[joins] final class UnsafeHashedRelation(
     var i = 0
     var keyBuffer = new Array[Byte](1024)
     var valuesBuffer = new Array[Byte](1024)
-    while (i < nKeys) {
+    while (i < nValues) {
       val keySize = in.readInt()
       val valuesSize = in.readInt()
       if (keySize > keyBuffer.length) {
@@ -326,13 +251,11 @@ private[joins] final class UnsafeHashedRelation(
       }
       in.readFully(valuesBuffer, 0, valuesSize)
 
-      // put it into binary map
       val loc = binaryMap.lookup(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize)
-      assert(!loc.isDefined, "Duplicated key found!")
-      val putSuceeded = loc.putNewKey(
-        keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize,
+      val putSuceeded = loc.append(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize,
         valuesBuffer, Platform.BYTE_ARRAY_OFFSET, valuesSize)
       if (!putSuceeded) {
+        binaryMap.free()
         throw new IOException("Could not allocate memory to grow BytesToBytesMap")
       }
       i += 1
@@ -340,6 +263,29 @@ private[joins] final class UnsafeHashedRelation(
   }
 }
 
+/**
+ * A HashedRelation for UnsafeRow with unique keys.
+ */
+private[joins] final class UniqueUnsafeHashedRelation(
+    private var numFields: Int,
+    private var binaryMap: BytesToBytesMap)
+  extends UnsafeHashedRelation(numFields, binaryMap) with UniqueHashedRelation {
+  def getValue(key: InternalRow): InternalRow = {
+    val unsafeKey = key.asInstanceOf[UnsafeRow]
+    val map = binaryMap  // avoid the compiler error
+    val loc = new map.Location  // this could be allocated in stack
+    binaryMap.safeLookup(unsafeKey.getBaseObject, unsafeKey.getBaseOffset,
+      unsafeKey.getSizeInBytes, loc, unsafeKey.hashCode())
+    if (loc.isDefined) {
+      val row = new UnsafeRow(numFields)
+      row.pointTo(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
+      row
+    } else {
+      null
+    }
+  }
+}
+
 private[joins] object UnsafeHashedRelation {
 
   def apply(
@@ -347,29 +293,54 @@ private[joins] object UnsafeHashedRelation {
       keyGenerator: UnsafeProjection,
       sizeEstimate: Int): HashedRelation = {
 
-    // Use a Java hash table here because unsafe maps expect fixed size records
-    // TODO: Use BytesToBytesMap for memory efficiency
-    val hashTable = new JavaHashMap[UnsafeRow, CompactBuffer[UnsafeRow]](sizeEstimate)
+    val taskMemoryManager = if (TaskContext.get() != null) {
+      TaskContext.get().taskMemoryManager()
+    } else {
+      new TaskMemoryManager(
+        new StaticMemoryManager(
+          new SparkConf().set("spark.memory.offHeap.enabled", "false"),
+          Long.MaxValue,
+          Long.MaxValue,
+          1),
+        0)
+    }
+    val pageSizeBytes = Option(SparkEnv.get).map(_.memoryManager.pageSizeBytes)
+      .getOrElse(new SparkConf().getSizeAsBytes("spark.buffer.pageSize", "16m"))
+
+    val binaryMap = new BytesToBytesMap(
+      taskMemoryManager,
+      // Only 70% of the slots can be used before growing, more capacity help to reduce collision
+      (sizeEstimate * 1.5 + 1).toInt,
+      pageSizeBytes)
 
     // Create a mapping of buildKeys -> rows
+    var numFields = 0
+    // Whether all the keys are unique or not
+    var allUnique: Boolean = true
     while (input.hasNext) {
-      val unsafeRow = input.next().asInstanceOf[UnsafeRow]
-      val rowKey = keyGenerator(unsafeRow)
-      if (!rowKey.anyNull) {
-        val existingMatchList = hashTable.get(rowKey)
-        val matchList = if (existingMatchList == null) {
-          val newMatchList = new CompactBuffer[UnsafeRow]()
-          hashTable.put(rowKey.copy(), newMatchList)
-          newMatchList
-        } else {
-          existingMatchList
+      val row = input.next().asInstanceOf[UnsafeRow]
+      numFields = row.numFields()
+      val key = keyGenerator(row)
+      if (!key.anyNull) {
+        val loc = binaryMap.lookup(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes)
+        if (loc.isDefined) {
+          allUnique = false
+        }
+        val success = loc.append(
+          key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
+          row.getBaseObject, row.getBaseOffset, row.getSizeInBytes)
+        if (!success) {
+          binaryMap.free()
+          throw new SparkException("There is no enough memory to build hash map")
         }
-        matchList += unsafeRow
       }
     }
 
-    // TODO: create UniqueUnsafeRelation
-    new UnsafeHashedRelation(hashTable)
+    if (allUnique) {
+      new UniqueUnsafeHashedRelation(numFields, binaryMap)
+    } else {
+      new UnsafeHashedRelation(numFields, binaryMap)
+    }
   }
 }
 
@@ -523,7 +494,7 @@ private[joins] object LongHashedRelation {
     keyGenerator: Projection,
     sizeEstimate: Int): HashedRelation = {
 
-    // Use a Java hash table here because unsafe maps expect fixed size records
+    // TODO: use LongToBytesMap for better memory efficiency
     val hashTable = new JavaHashMap[Long, CompactBuffer[UnsafeRow]](sizeEstimate)
 
     // Create a mapping of key -> rows
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
index 5c4f1ef60fd08..e3a2eaea5dd8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala
@@ -57,9 +57,19 @@ case class ShuffledHashJoin(
     ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
 
   private def buildHashedRelation(iter: Iterator[UnsafeRow]): HashedRelation = {
+    val context = TaskContext.get()
+    if (!canJoinKeyFitWithinLong) {
+      // build BytesToBytesMap
+      val relation = HashedRelation(canJoinKeyFitWithinLong, iter, buildSideKeyGenerator)
+      // This relation is usually used until the end of task.
+      context.addTaskCompletionListener((t: TaskContext) =>
+        relation.close()
+      )
+      return relation
+    }
+
     // try to acquire some memory for the hash table, it could trigger other operator to free some
     // memory. The memory acquired here will mostly be used until the end of task.
-    val context = TaskContext.get()
     val memoryManager = context.taskMemoryManager()
     var acquired = 0L
     var used = 0L
@@ -69,18 +79,18 @@ case class ShuffledHashJoin(
 
     val copiedIter = iter.map { row =>
       // It's hard to guess what's exactly memory will be used, we have a rough guess here.
-      // TODO: use BytesToBytesMap instead of HashMap for memory efficiency
-      // Each pair in HashMap will have two UnsafeRows, one CompactBuffer, maybe 10+ pointers
+      // TODO: use LongToBytesMap instead of HashMap for memory efficiency
+      // Each pair in HashMap will have UnsafeRow, CompactBuffer, maybe 10+ pointers
       val needed = 150 + row.getSizeInBytes
       if (needed > acquired - used) {
         val got = memoryManager.acquireExecutionMemory(
           Math.max(memoryManager.pageSizeBytes(), needed), MemoryMode.ON_HEAP, null)
+        acquired += got
         if (got < needed) {
           throw new SparkException("Can't acquire enough memory to build hash map in shuffled" +
             "hash join, please use sort merge join by setting " +
             "spark.sql.join.preferSortMergeJoin=true")
         }
-        acquired += got
       }
       used += needed
       // HashedRelation requires that the UnsafeRow should be separate objects.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 60e00d203ccde..c4e410d92cea3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.streaming
 
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
-import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
@@ -34,6 +33,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.util.ContinuousQueryListener
 import org.apache.spark.sql.util.ContinuousQueryListener._
+import org.apache.spark.util.UninterruptibleThread
 
 /**
  * Manages the execution of a streaming Spark SQL query that is occurring in a separate thread.
@@ -89,9 +89,10 @@ class StreamExecution(
   private[sql] var streamDeathCause: ContinuousQueryException = null
 
   /** The thread that runs the micro-batches of this stream. */
-  private[sql] val microBatchThread = new Thread(s"stream execution thread for $name") {
-    override def run(): Unit = { runBatches() }
-  }
+  private[sql] val microBatchThread =
+    new UninterruptibleThread(s"stream execution thread for $name") {
+      override def run(): Unit = { runBatches() }
+    }
 
   /**
    * A write-ahead-log that records the offsets that are present in each batch. In order to ensure
@@ -102,65 +103,6 @@ class StreamExecution(
   private val offsetLog =
     new HDFSMetadataLog[CompositeOffset](sqlContext, checkpointFile("offsets"))
 
-  /** A monitor to protect "uninterruptible" and "interrupted" */
-  private val uninterruptibleLock = new Object
-
-  /**
-   * Indicates if "microBatchThread" are in the uninterruptible status. If so, interrupting
-   * "microBatchThread" will be deferred until "microBatchThread" enters into the interruptible
-   * status.
-   */
-  @GuardedBy("uninterruptibleLock")
-  private var uninterruptible = false
-
-  /**
-   * Indicates if we should interrupt "microBatchThread" when we are leaving the uninterruptible
-   * zone.
-   */
-  @GuardedBy("uninterruptibleLock")
-  private var shouldInterruptThread = false
-
-  /**
-   * Interrupt "microBatchThread" if possible. If "microBatchThread" is in the uninterruptible
-   * status, "microBatchThread" won't be interrupted until it enters into the interruptible status.
-   */
-  private def interruptMicroBatchThreadSafely(): Unit = {
-    uninterruptibleLock.synchronized {
-      if (uninterruptible) {
-        shouldInterruptThread = true
-      } else {
-        microBatchThread.interrupt()
-      }
-    }
-  }
-
-  /**
-   * Run `f` uninterruptibly in "microBatchThread". "microBatchThread" won't be interrupted before
-   * returning from `f`.
-   */
-  private def runUninterruptiblyInMicroBatchThread[T](f: => T): T = {
-    assert(Thread.currentThread() == microBatchThread)
-    uninterruptibleLock.synchronized {
-      uninterruptible = true
-      // Clear the interrupted status if it's set.
-      if (Thread.interrupted()) {
-        shouldInterruptThread = true
-      }
-    }
-    try {
-      f
-    } finally {
-      uninterruptibleLock.synchronized {
-        uninterruptible = false
-        if (shouldInterruptThread) {
-          // Recover the interrupted status
-          microBatchThread.interrupt()
-          shouldInterruptThread = false
-        }
-      }
-    }
-  }
-
   /** Whether the query is currently active or not */
   override def isActive: Boolean = state == ACTIVE
 
@@ -294,7 +236,7 @@ class StreamExecution(
     // method. See SPARK-14131.
     //
     // Check to see what new data is available.
-    val newData = runUninterruptiblyInMicroBatchThread {
+    val newData = microBatchThread.runUninterruptibly {
       uniqueSources.flatMap(s => s.getOffset.map(o => s -> o))
     }
     availableOffsets ++= newData
@@ -305,7 +247,7 @@ class StreamExecution(
       // As "offsetLog.add" will create a file using HDFS API and call "Shell.runCommand" to set
       // the file permission, we should not interrupt "microBatchThread" when running this method.
       // See SPARK-14131.
-      runUninterruptiblyInMicroBatchThread {
+      microBatchThread.runUninterruptibly {
         assert(
           offsetLog.add(currentBatchId, availableOffsets.toCompositeOffset(sources)),
           s"Concurrent update to the log.  Multiple streaming jobs detected for $currentBatchId")
@@ -395,7 +337,7 @@ class StreamExecution(
     // intentionally
     state = TERMINATED
     if (microBatchThread.isAlive) {
-      interruptMicroBatchThreadSafely()
+      microBatchThread.interrupt()
       microBatchThread.join()
     }
     logInfo(s"Query $name was stopped")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 8abb9d7e4a1f0..7ce15e3f355c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.parser.CatalystQl
 import org.apache.spark.sql.catalyst.plans.logical.BroadcastHint
+import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -1172,8 +1172,7 @@ object functions {
    * @group normal_funcs
    */
   def expr(expr: String): Column = {
-    val parser = SQLContext.getActive().map(_.sessionState.sqlParser).getOrElse(new CatalystQl())
-    Column(parser.parseExpression(expr))
+    Column(SparkSqlParser.parseExpression(expr))
   }
 
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index e5f02caabcca4..f7fdfacd310e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -44,14 +44,14 @@ private[sql] class SessionState(ctx: SQLContext) {
   lazy val experimentalMethods = new ExperimentalMethods
 
   /**
-   * Internal catalog for managing table and database states.
+   * Internal catalog for managing functions registered by the user.
    */
-  lazy val catalog = new SessionCatalog(ctx.externalCatalog, conf)
+  lazy val functionRegistry: FunctionRegistry = FunctionRegistry.builtin.copy()
 
   /**
-   * Internal catalog for managing functions registered by the user.
+   * Internal catalog for managing table and database states.
    */
-  lazy val functionRegistry: FunctionRegistry = FunctionRegistry.builtin.copy()
+  lazy val catalog = new SessionCatalog(ctx.externalCatalog, functionRegistry, conf)
 
   /**
    * Interface exposed to the user for registering user-defined functions.
@@ -81,7 +81,7 @@ private[sql] class SessionState(ctx: SQLContext) {
   /**
    * Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
    */
-  lazy val sqlParser: ParserInterface = new SparkQl(conf)
+  lazy val sqlParser: ParserInterface = SparkSqlParser
 
   /**
    * Planner that converts optimized logical plans to physical plans.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 1f78bcc6782b9..bf5915d763f83 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -329,8 +329,8 @@ class JoinSuite extends QueryTest with SharedSQLContext {
   }
 
   test("full outer join") {
-    upperCaseData.where('N <= 4).registerTempTable("left")
-    upperCaseData.where('N >= 3).registerTempTable("right")
+    upperCaseData.where('N <= 4).registerTempTable("`left`")
+    upperCaseData.where('N >= 3).registerTempTable("`right`")
 
     val left = UnresolvedRelation(TableIdentifier("left"), None)
     val right = UnresolvedRelation(TableIdentifier("right"), None)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index a1b45ca7ebd19..7ff4ffcaecd49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution.{LogicalRDD, Queryable}
+import org.apache.spark.sql.execution.LogicalRDD
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 
@@ -180,9 +180,9 @@ abstract class QueryTest extends PlanTest {
   }
 
   /**
-   * Asserts that a given [[Queryable]] will be executed using the given number of cached results.
+   * Asserts that a given [[Dataset]] will be executed using the given number of cached results.
    */
-  def assertCached(query: Queryable, numCachedTables: Int = 1): Unit = {
+  def assertCached(query: Dataset[_], numCachedTables: Int = 1): Unit = {
     val planWithCaching = query.queryExecution.withCachedData
     val cachedData = planWithCaching collect {
       case cached: InMemoryRelation => cached
@@ -286,9 +286,9 @@ abstract class QueryTest extends PlanTest {
   }
 
   /**
-    * Asserts that a given [[Queryable]] does not have missing inputs in all the analyzed plans.
+    * Asserts that a given [[Dataset]] does not have missing inputs in all the analyzed plans.
     */
-  def assertEmptyMissingInput(query: Queryable): Unit = {
+  def assertEmptyMissingInput(query: Dataset[_]): Unit = {
     assert(query.queryExecution.analyzed.missingInput.isEmpty,
       s"The analyzed logical plan has missing inputs: ${query.queryExecution.analyzed}")
     assert(query.queryExecution.optimizedPlan.missingInput.isEmpty,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 012d25d916e40..d0d0c2ae04cda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1656,7 +1656,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     val e2 = intercept[AnalysisException] {
       sql("select interval 23 nanosecond")
     }
-    assert(e2.message.contains("cannot recognize input near"))
+    assert(e2.message.contains("No interval can be constructed"))
   }
 
   test("SPARK-8945: add and subtract expressions for interval type") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BenchmarkWholeStageCodegen.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BenchmarkWholeStageCodegen.scala
index 0b1cb90186929..a16092e7d7b21 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BenchmarkWholeStageCodegen.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BenchmarkWholeStageCodegen.scala
@@ -184,11 +184,29 @@ class BenchmarkWholeStageCodegen extends SparkFunSuite {
 
     /**
     Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Join w 2 longs:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    Join w 2 longs:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
     -------------------------------------------------------------------------------------------
-    Join w 2 longs codegen=false              7877 / 8358         13.3          75.1       1.0X
-    Join w 2 longs codegen=true               3877 / 3937         27.0          37.0       2.0X
+    Join w 2 longs codegen=false           12725 / 13158          8.2         121.4       1.0X
+    Join w 2 longs codegen=true              6044 / 6771         17.3          57.6       2.1X
       */
+
+    val dim4 = broadcast(sqlContext.range(M)
+      .selectExpr("cast(id/10 as long) as k1", "cast(id/10 as long) as k2"))
+
+    runBenchmark("Join w 2 longs duplicated", N) {
+      sqlContext.range(N).join(dim4,
+        (col("id") bitwiseAND M) === col("k1") && (col("id") bitwiseAND M) === col("k2"))
+        .count()
+    }
+
+    /**
+    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+    Join w 2 longs:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    -------------------------------------------------------------------------------------------
+    Join w 2 longs duplicated codegen=false 13066 / 13710          8.0         124.6       1.0X
+    Join w 2 longs duplicated codegen=true    7122 / 7277         14.7          67.9       1.8X
+     */
+
     runBenchmark("outer join w long", N) {
       sqlContext.range(N).join(dim, (col("id") bitwiseAND M) === col("k"), "left").count()
     }
@@ -438,7 +456,7 @@ class BenchmarkWholeStageCodegen extends SparkFunSuite {
             value.setInt(0, value.getInt(0) + 1)
             i += 1
           } else {
-            loc.putNewKey(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
+            loc.append(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
               value.getBaseObject, value.getBaseOffset, value.getSizeInBytes)
           }
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 7a6343748ba9e..03079c6890a84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -18,14 +18,13 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.expressions.{Ascending, Descending}
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.execution.SparkQl
+import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.execution.datasources.BucketSpec
 import org.apache.spark.sql.types._
 
 class DDLCommandSuite extends PlanTest {
-  private val parser = new SparkQl
+  private val parser = SparkSqlParser
 
   test("create database") {
     val sql =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index e19b4ff1e2ff8..ed4cc1c4c4e6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -19,11 +19,13 @@ package org.apache.spark.sql.execution.joins
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.unsafe.map.BytesToBytesMap
 import org.apache.spark.util.collection.CompactBuffer
 
 class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
@@ -69,10 +71,17 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext {
   }
 
   test("test serialization empty hash map") {
+    val taskMemoryManager = new TaskMemoryManager(
+      new StaticMemoryManager(
+        new SparkConf().set("spark.memory.offHeap.enabled", "false"),
+        Long.MaxValue,
+        Long.MaxValue,
+        1),
+      0)
+    val binaryMap = new BytesToBytesMap(taskMemoryManager, 1, 1)
     val os = new ByteArrayOutputStream()
     val out = new ObjectOutputStream(os)
-    val hashed = new UnsafeHashedRelation(
-      new java.util.HashMap[UnsafeRow, CompactBuffer[UnsafeRow]])
+    val hashed = new UnsafeHashedRelation(1, binaryMap)
     hashed.writeExternal(out)
     out.flush()
     val in = new ObjectInputStream(new ByteArrayInputStream(os.toByteArray))
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 650797f7683e1..bedbf9ae17271 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -291,7 +291,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "compute_stats_empty_table",
     "compute_stats_long",
     "create_view_translate",
-    "show_create_table_serde",
     "show_tblproperties",
 
     // Odd changes to output
@@ -344,6 +343,15 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // These tests check the VIEW table definition, but Spark handles CREATE VIEW itself and
     // generates different View Expanded Text.
     "alter_view_as_select",
+
+    // We don't support show create table commands in general
+    "show_create_table_alter",
+    "show_create_table_db_table",
+    "show_create_table_delimited",
+    "show_create_table_does_not_exist",
+    "show_create_table_index",
+    "show_create_table_partitioned",
+    "show_create_table_serde",
     "show_create_table_view"
   )
 
@@ -833,13 +841,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "serde_reported_schema",
     "set_variable_sub",
     "show_columns",
-    "show_create_table_alter",
-    "show_create_table_db_table",
-    "show_create_table_delimited",
-    "show_create_table_does_not_exist",
-    "show_create_table_index",
-    "show_create_table_partitioned",
-    "show_create_table_serde",
     "show_describe_func_quotes",
     "show_functions",
     "show_partitions",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index ca3ce43591f5f..c0b6d16d3cc29 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -86,7 +86,7 @@ class HiveContext private[hive](
     @transient private[hive] val executionHive: HiveClientImpl,
     @transient private[hive] val metadataHive: HiveClient,
     isRootContext: Boolean,
-    @transient private[sql] val hiveCatalog: HiveCatalog)
+    @transient private[sql] val hiveCatalog: HiveExternalCatalog)
   extends SQLContext(sc, cacheManager, listener, isRootContext, hiveCatalog) with Logging {
   self =>
 
@@ -98,7 +98,7 @@ class HiveContext private[hive](
       execHive,
       metaHive,
       true,
-      new HiveCatalog(metaHive))
+      new HiveExternalCatalog(metaHive))
   }
 
   def this(sc: SparkContext) = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
similarity index 96%
rename from sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveCatalog.scala
rename to sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 0722fb02a8f9d..f75509fe80692 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.hive.client.HiveClient
  * A persistent implementation of the system catalog using Hive.
  * All public methods must be synchronized for thread-safety.
  */
-private[spark] class HiveCatalog(client: HiveClient) extends ExternalCatalog with Logging {
+private[spark] class HiveExternalCatalog(client: HiveClient) extends ExternalCatalog with Logging {
   import ExternalCatalog._
 
   // Exceptions thrown by the hive client that we would like to wrap
@@ -74,10 +74,10 @@ private[spark] class HiveCatalog(client: HiveClient) extends ExternalCatalog wit
   }
 
   private def requireDbMatches(db: String, table: CatalogTable): Unit = {
-    if (table.name.database != Some(db)) {
+    if (table.identifier.database != Some(db)) {
       throw new AnalysisException(
         s"Provided database $db does not much the one specified in the " +
-        s"table definition (${table.name.database.getOrElse("n/a")})")
+        s"table definition (${table.identifier.database.getOrElse("n/a")})")
     }
   }
 
@@ -160,7 +160,8 @@ private[spark] class HiveCatalog(client: HiveClient) extends ExternalCatalog wit
   }
 
   override def renameTable(db: String, oldName: String, newName: String): Unit = withClient {
-    val newTable = client.getTable(db, oldName).copy(name = TableIdentifier(newName, Some(db)))
+    val newTable = client.getTable(db, oldName)
+      .copy(identifier = TableIdentifier(newName, Some(db)))
     client.alterTable(oldName, newTable)
   }
 
@@ -173,7 +174,7 @@ private[spark] class HiveCatalog(client: HiveClient) extends ExternalCatalog wit
    */
   override def alterTable(db: String, tableDefinition: CatalogTable): Unit = withClient {
     requireDbMatches(db, tableDefinition)
-    requireTableExists(db, tableDefinition.name.table)
+    requireTableExists(db, tableDefinition.identifier.table)
     client.alterTable(tableDefinition)
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index c7066d73631af..eedd12d76af54 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -102,7 +102,7 @@ private[hive] object HiveSerDe {
  * Legacy catalog for interacting with the Hive metastore.
  *
  * This is still used for things like creating data source tables, but in the future will be
- * cleaned up to integrate more nicely with [[HiveCatalog]].
+ * cleaned up to integrate more nicely with [[HiveExternalCatalog]].
  */
 private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveContext)
   extends Logging {
@@ -124,8 +124,8 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
 
   private def getQualifiedTableName(t: CatalogTable): QualifiedTableName = {
     QualifiedTableName(
-      t.name.database.getOrElse(getCurrentDatabase).toLowerCase,
-      t.name.table.toLowerCase)
+      t.identifier.database.getOrElse(getCurrentDatabase).toLowerCase,
+      t.identifier.table.toLowerCase)
   }
 
   /** A cache of Spark SQL data source tables that have been accessed. */
@@ -299,7 +299,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
 
     def newSparkSQLSpecificMetastoreTable(): CatalogTable = {
       CatalogTable(
-        name = TableIdentifier(tblName, Option(dbName)),
+        identifier = TableIdentifier(tblName, Option(dbName)),
         tableType = tableType,
         schema = Nil,
         storage = CatalogStorageFormat(
@@ -319,7 +319,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
       assert(relation.partitionSchema.isEmpty)
 
       CatalogTable(
-        name = TableIdentifier(tblName, Option(dbName)),
+        identifier = TableIdentifier(tblName, Option(dbName)),
         tableType = tableType,
         storage = CatalogStorageFormat(
           locationUri = Some(relation.location.paths.map(_.toUri.toString).head),
@@ -431,7 +431,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
       alias match {
         // because hive use things like `_c0` to build the expanded text
         // currently we cannot support view from "create view v1(c1) as ..."
-        case None => SubqueryAlias(table.name.table, hive.parseSql(viewText))
+        case None => SubqueryAlias(table.identifier.table, hive.parseSql(viewText))
         case Some(aliasText) => SubqueryAlias(aliasText, hive.parseSql(viewText))
       }
     } else {
@@ -611,7 +611,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
         val QualifiedTableName(dbName, tblName) = getQualifiedTableName(table)
 
         execution.CreateViewAsSelect(
-          table.copy(name = TableIdentifier(tblName, Some(dbName))),
+          table.copy(identifier = TableIdentifier(tblName, Some(dbName))),
           child,
           allowExisting,
           replace)
@@ -633,7 +633,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
         if (hive.convertCTAS && table.storage.serde.isEmpty) {
           // Do the conversion when spark.sql.hive.convertCTAS is true and the query
           // does not specify any storage format (file format and storage handler).
-          if (table.name.database.isDefined) {
+          if (table.identifier.database.isDefined) {
             throw new AnalysisException(
               "Cannot specify database name in a CTAS statement " +
                 "when spark.sql.hive.convertCTAS is set to true.")
@@ -641,7 +641,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
 
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
-            TableIdentifier(desc.name.table),
+            TableIdentifier(desc.identifier.table),
             conf.defaultDataSourceName,
             temporary = false,
             Array.empty[String],
@@ -662,7 +662,7 @@ private[hive] class HiveMetastoreCatalog(val client: HiveClient, hive: HiveConte
           val QualifiedTableName(dbName, tblName) = getQualifiedTableName(table)
 
           execution.CreateTableAsSelect(
-            desc.copy(name = TableIdentifier(tblName, Some(dbName))),
+            desc.copy(identifier = TableIdentifier(tblName, Some(dbName))),
             child,
             allowExisting)
         }
@@ -792,7 +792,7 @@ private[hive] case class MetastoreRelation(
     // We start by constructing an API table as Hive performs several important transformations
     // internally when converting an API table to a QL table.
     val tTable = new org.apache.hadoop.hive.metastore.api.Table()
-    tTable.setTableName(table.name.table)
+    tTable.setTableName(table.identifier.table)
     tTable.setDbName(table.database)
 
     val tableParameters = new java.util.HashMap[String, String]()
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index e5bcb9b1db2bc..052c43a3cef51 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -31,7 +31,6 @@ import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser._
@@ -60,7 +59,7 @@ private[hive] case class CreateTableAsSelect(
 
   override def output: Seq[Attribute] = Seq.empty[Attribute]
   override lazy val resolved: Boolean =
-    tableDesc.name.database.isDefined &&
+    tableDesc.identifier.database.isDefined &&
     tableDesc.schema.nonEmpty &&
     tableDesc.storage.serde.isDefined &&
     tableDesc.storage.inputFormat.isDefined &&
@@ -83,7 +82,28 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
   import ParseUtils._
   import ParserUtils._
 
-  protected val nativeCommands = Seq(
+  // Token text -> human readable text
+  private val hiveUnsupportedCommands = Map(
+    "TOK_CREATEROLE" -> "CREATE ROLE",
+    "TOK_DROPROLE" -> "DROP ROLE",
+    "TOK_EXPORT" -> "EXPORT TABLE",
+    "TOK_GRANT" -> "GRANT",
+    "TOK_GRANT_ROLE" -> "GRANT",
+    "TOK_IMPORT" -> "IMPORT TABLE",
+    "TOK_REVOKE" -> "REVOKE",
+    "TOK_REVOKE_ROLE" -> "REVOKE",
+    "TOK_SHOW_COMPACTIONS" -> "SHOW COMPACTIONS",
+    "TOK_SHOW_CREATETABLE" -> "SHOW CREATE TABLE",
+    "TOK_SHOW_GRANT" -> "SHOW GRANT",
+    "TOK_SHOW_ROLE_GRANT" -> "SHOW ROLE GRANT",
+    "TOK_SHOW_ROLE_PRINCIPALS" -> "SHOW PRINCIPALS",
+    "TOK_SHOW_ROLES" -> "SHOW ROLES",
+    "TOK_SHOW_SET_ROLE" -> "SHOW CURRENT ROLES / SET ROLE",
+    "TOK_SHOW_TRANSACTIONS" -> "SHOW TRANSACTIONS",
+    "TOK_SHOWINDEXES" -> "SHOW INDEXES",
+    "TOK_SHOWLOCKS" -> "SHOW LOCKS")
+
+  private val nativeCommands = Set(
     "TOK_ALTERDATABASE_OWNER",
     "TOK_ALTERINDEX_PROPERTIES",
     "TOK_ALTERINDEX_REBUILD",
@@ -97,51 +117,30 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
 
     "TOK_CREATEINDEX",
     "TOK_CREATEMACRO",
-    "TOK_CREATEROLE",
 
     "TOK_DROPINDEX",
     "TOK_DROPMACRO",
-    "TOK_DROPROLE",
     "TOK_DROPTABLE_PROPERTIES",
     "TOK_DROPVIEW",
     "TOK_DROPVIEW_PROPERTIES",
 
-    "TOK_EXPORT",
-
-    "TOK_GRANT",
-    "TOK_GRANT_ROLE",
-
-    "TOK_IMPORT",
-
     "TOK_LOAD",
 
     "TOK_LOCKTABLE",
 
     "TOK_MSCK",
 
-    "TOK_REVOKE",
-
-    "TOK_SHOW_COMPACTIONS",
-    "TOK_SHOW_CREATETABLE",
-    "TOK_SHOW_GRANT",
-    "TOK_SHOW_ROLE_GRANT",
-    "TOK_SHOW_ROLE_PRINCIPALS",
-    "TOK_SHOW_ROLES",
-    "TOK_SHOW_SET_ROLE",
     "TOK_SHOW_TABLESTATUS",
     "TOK_SHOW_TBLPROPERTIES",
-    "TOK_SHOW_TRANSACTIONS",
     "TOK_SHOWCOLUMNS",
     "TOK_SHOWDATABASES",
-    "TOK_SHOWINDEXES",
-    "TOK_SHOWLOCKS",
     "TOK_SHOWPARTITIONS",
 
     "TOK_UNLOCKTABLE"
   )
 
   // Commands that we do not need to explain.
-  protected val noExplainCommands = Seq(
+  private val noExplainCommands = Set(
     "TOK_DESCTABLE",
     "TOK_SHOWTABLES",
     "TOK_TRUNCATETABLE", // truncate table" is a NativeCommand, does not need to explain.
@@ -183,7 +182,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
     val tableIdentifier = extractTableIdent(viewNameParts)
     val originalText = query.source
     val tableDesc = CatalogTable(
-      name = tableIdentifier,
+      identifier = tableIdentifier,
       tableType = CatalogTableType.VIRTUAL_VIEW,
       schema = schema,
       storage = CatalogStorageFormat(
@@ -209,6 +208,9 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
     safeParse(sql, ParseDriver.parsePlan(sql, conf)) { ast =>
       if (nativeCommands.contains(ast.text)) {
         HiveNativeCommand(sql)
+      } else if (hiveUnsupportedCommands.contains(ast.text)) {
+        val humanReadableText = hiveUnsupportedCommands(ast.text)
+        throw new AnalysisException("Unsupported operation: " + humanReadableText)
       } else {
         nodeToPlan(ast) match {
           case NativePlaceholder => HiveNativeCommand(sql)
@@ -352,7 +354,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
 
         // TODO add bucket support
         var tableDesc: CatalogTable = CatalogTable(
-          name = tableIdentifier,
+          identifier = tableIdentifier,
           tableType =
             if (externalTable.isDefined) {
               CatalogTableType.EXTERNAL_TABLE
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index aa44cba4b5641..ff12245e8ddd2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -28,11 +29,12 @@ import org.apache.spark.sql.types.StructType
 
 
 class HiveSessionCatalog(
-    externalCatalog: HiveCatalog,
+    externalCatalog: HiveExternalCatalog,
     client: HiveClient,
     context: HiveContext,
+    functionRegistry: FunctionRegistry,
     conf: SQLConf)
-  extends SessionCatalog(externalCatalog, conf) {
+  extends SessionCatalog(externalCatalog, functionRegistry, conf) {
 
   override def setCurrentDatabase(db: String): Unit = {
     super.setCurrentDatabase(db)
@@ -41,11 +43,11 @@ class HiveSessionCatalog(
 
   override def lookupRelation(name: TableIdentifier, alias: Option[String]): LogicalPlan = {
     val table = formatTableName(name.table)
-    if (name.database.isDefined || !tempTables.containsKey(table)) {
+    if (name.database.isDefined || !tempTables.contains(table)) {
       val newName = name.copy(table = table)
       metastoreCatalog.lookupRelation(newName, alias)
     } else {
-      val relation = tempTables.get(table)
+      val relation = tempTables(table)
       val tableWithQualifiers = SubqueryAlias(table, relation)
       // If an alias was specified by the lookup, wrap the plan in a subquery so that
       // attributes are properly qualified with this alias.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index caa7f296ed16a..c9b6b1dfb6e05 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -34,13 +34,6 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
     override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
   }
 
-  /**
-   * Internal catalog for managing table and database states.
-   */
-  override lazy val catalog = {
-    new HiveSessionCatalog(ctx.hiveCatalog, ctx.metadataHive, ctx, conf)
-  }
-
   /**
    * Internal catalog for managing functions registered by the user.
    * Note that HiveUDFs will be overridden by functions registered in this context.
@@ -49,6 +42,13 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
     new HiveFunctionRegistry(FunctionRegistry.builtin.copy(), ctx.executionHive)
   }
 
+  /**
+   * Internal catalog for managing table and database states.
+   */
+  override lazy val catalog = {
+    new HiveSessionCatalog(ctx.hiveCatalog, ctx.metadataHive, ctx, functionRegistry, conf)
+  }
+
   /**
    * An analyzer that uses the Hive metastore.
    */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index f4d30358cafa8..ee56f9d75da80 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -88,7 +88,7 @@ private[hive] trait HiveClient {
   def dropTable(dbName: String, tableName: String, ignoreIfNotExists: Boolean): Unit
 
   /** Alter a table whose name matches the one specified in `table`, assuming it exists. */
-  final def alterTable(table: CatalogTable): Unit = alterTable(table.name.table, table)
+  final def alterTable(table: CatalogTable): Unit = alterTable(table.identifier.table, table)
 
   /** Updates the given table with new metadata, optionally renaming the table. */
   def alterTable(tableName: String, table: CatalogTable): Unit
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index e4e15d13df658..a31178e3472d6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -298,7 +298,7 @@ private[hive] class HiveClientImpl(
     logDebug(s"Looking up $dbName.$tableName")
     Option(client.getTable(dbName, tableName, false)).map { h =>
       CatalogTable(
-        name = TableIdentifier(h.getTableName, Option(h.getDbName)),
+        identifier = TableIdentifier(h.getTableName, Option(h.getDbName)),
         tableType = h.getTableType match {
           case HiveTableType.EXTERNAL_TABLE => CatalogTableType.EXTERNAL_TABLE
           case HiveTableType.MANAGED_TABLE => CatalogTableType.MANAGED_TABLE
@@ -544,13 +544,14 @@ private[hive] class HiveClientImpl(
   }
 
   override def renameFunction(db: String, oldName: String, newName: String): Unit = withHiveState {
-    val catalogFunc = getFunction(db, oldName).copy(name = FunctionIdentifier(newName, Some(db)))
+    val catalogFunc = getFunction(db, oldName)
+      .copy(identifier = FunctionIdentifier(newName, Some(db)))
     val hiveFunc = toHiveFunction(catalogFunc, db)
     client.alterFunction(db, oldName, hiveFunc)
   }
 
   override def alterFunction(db: String, func: CatalogFunction): Unit = withHiveState {
-    client.alterFunction(db, func.name.funcName, toHiveFunction(func, db))
+    client.alterFunction(db, func.identifier.funcName, toHiveFunction(func, db))
   }
 
   override def getFunctionOption(
@@ -611,7 +612,7 @@ private[hive] class HiveClientImpl(
 
   private def toHiveFunction(f: CatalogFunction, db: String): HiveFunction = {
     new HiveFunction(
-      f.name.funcName,
+      f.identifier.funcName,
       db,
       f.className,
       null,
@@ -639,7 +640,7 @@ private[hive] class HiveClientImpl(
   }
 
   private def toHiveTable(table: CatalogTable): HiveTable = {
-    val hiveTable = new HiveTable(table.database, table.name.table)
+    val hiveTable = new HiveTable(table.database, table.identifier.table)
     hiveTable.setTableType(table.tableType match {
       case CatalogTableType.EXTERNAL_TABLE => HiveTableType.EXTERNAL_TABLE
       case CatalogTableType.MANAGED_TABLE => HiveTableType.MANAGED_TABLE
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 5a61eef0f2439..29f7dc2997d26 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -38,7 +38,7 @@ case class CreateTableAsSelect(
     allowExisting: Boolean)
   extends RunnableCommand {
 
-  private val tableIdentifier = tableDesc.name
+  private val tableIdentifier = tableDesc.identifier
 
   override def children: Seq[LogicalPlan] = Seq(query)
 
@@ -93,6 +93,8 @@ case class CreateTableAsSelect(
   }
 
   override def argString: String = {
-    s"[Database:${tableDesc.database}}, TableName: ${tableDesc.name.table}, InsertIntoHiveTable]"
+    s"[Database:${tableDesc.database}}, " +
+    s"TableName: ${tableDesc.identifier.table}, " +
+    s"InsertIntoHiveTable]"
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateViewAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateViewAsSelect.scala
index 9ff520da1d41d..33cd8b44805b8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateViewAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateViewAsSelect.scala
@@ -44,7 +44,7 @@ private[hive] case class CreateViewAsSelect(
   assert(tableDesc.schema == Nil || tableDesc.schema.length == childSchema.length)
   assert(tableDesc.viewText.isDefined)
 
-  private val tableIdentifier = tableDesc.name
+  private val tableIdentifier = tableDesc.identifier
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
@@ -116,7 +116,7 @@ private[hive] case class CreateViewAsSelect(
     }
 
     val viewText = tableDesc.viewText.get
-    val viewName = quote(tableDesc.name.table)
+    val viewName = quote(tableDesc.identifier.table)
     s"SELECT $viewOutput FROM ($viewText) $viewName"
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index efaa052370870..c07c428895c26 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -141,6 +141,16 @@ private[hive] class HiveFunctionRegistry(
       }
     }.getOrElse(None))
   }
+
+  override def lookupFunctionBuilder(name: String): Option[FunctionBuilder] = {
+    underlying.lookupFunctionBuilder(name)
+  }
+
+  // Note: This does not drop functions stored in the metastore
+  override def dropFunction(name: String): Boolean = {
+    underlying.dropFunction(name)
+  }
+
 }
 
 private[hive] case class HiveSimpleUDF(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a1785ca03883e..4afc8d18a6f8a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -78,7 +78,7 @@ class TestHiveContext private[hive](
     executionHive: HiveClientImpl,
     metadataHive: HiveClient,
     isRootContext: Boolean,
-    hiveCatalog: HiveCatalog,
+    hiveCatalog: HiveExternalCatalog,
     val warehousePath: File,
     val scratchDirPath: File,
     metastoreTemporaryConf: Map[String, String])
@@ -110,7 +110,7 @@ class TestHiveContext private[hive](
       executionHive,
       metadataHive,
       true,
-      new HiveCatalog(metadataHive),
+      new HiveExternalCatalog(metadataHive),
       warehousePath,
       scratchDirPath,
       metastoreTemporaryConf)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
similarity index 90%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveCatalogSuite.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 427f5747a010f..3334c16f0be87 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -26,9 +26,9 @@ import org.apache.spark.sql.hive.client.{HiveClient, IsolatedClientLoader}
 import org.apache.spark.util.Utils
 
 /**
- * Test suite for the [[HiveCatalog]].
+ * Test suite for the [[HiveExternalCatalog]].
  */
-class HiveCatalogSuite extends CatalogTestCases {
+class HiveExternalCatalogSuite extends CatalogTestCases {
 
   private val client: HiveClient = {
     IsolatedClientLoader.forVersion(
@@ -41,7 +41,7 @@ class HiveCatalogSuite extends CatalogTestCases {
   protected override val utils: CatalogTestUtils = new CatalogTestUtils {
     override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat"
     override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
-    override def newEmptyCatalog(): ExternalCatalog = new HiveCatalog(client)
+    override def newEmptyCatalog(): ExternalCatalog = new HiveExternalCatalog(client)
   }
 
   protected override def resetState(): Unit = client.reset()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
index 1c775db9b616b..0aaf57649c323 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
@@ -54,8 +54,8 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val (desc, exists) = extractTableDesc(s1)
     assert(exists)
-    assert(desc.name.database == Some("mydb"))
-    assert(desc.name.table == "page_view")
+    assert(desc.identifier.database == Some("mydb"))
+    assert(desc.identifier.table == "page_view")
     assert(desc.tableType == CatalogTableType.EXTERNAL_TABLE)
     assert(desc.storage.locationUri == Some("/user/external/page_view"))
     assert(desc.schema ==
@@ -100,8 +100,8 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val (desc, exists) = extractTableDesc(s2)
     assert(exists)
-    assert(desc.name.database == Some("mydb"))
-    assert(desc.name.table == "page_view")
+    assert(desc.identifier.database == Some("mydb"))
+    assert(desc.identifier.table == "page_view")
     assert(desc.tableType == CatalogTableType.EXTERNAL_TABLE)
     assert(desc.storage.locationUri == Some("/user/external/page_view"))
     assert(desc.schema ==
@@ -127,8 +127,8 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
     val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
     val (desc, exists) = extractTableDesc(s3)
     assert(exists == false)
-    assert(desc.name.database == None)
-    assert(desc.name.table == "page_view")
+    assert(desc.identifier.database == None)
+    assert(desc.identifier.table == "page_view")
     assert(desc.tableType == CatalogTableType.MANAGED_TABLE)
     assert(desc.storage.locationUri == None)
     assert(desc.schema == Seq.empty[CatalogColumn])
@@ -162,8 +162,8 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
                |   ORDER BY key, value""".stripMargin
     val (desc, exists) = extractTableDesc(s5)
     assert(exists == false)
-    assert(desc.name.database == None)
-    assert(desc.name.table == "ctas2")
+    assert(desc.identifier.database == None)
+    assert(desc.identifier.table == "ctas2")
     assert(desc.tableType == CatalogTableType.MANAGED_TABLE)
     assert(desc.storage.locationUri == None)
     assert(desc.schema == Seq.empty[CatalogColumn])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala
index 5272f4192e4c8..e8188e5f02f28 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala
@@ -34,7 +34,7 @@ class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAft
     super.beforeAll()
     // The catalog in HiveContext is a case insensitive one.
     sessionState.catalog.createTempTable(
-      "ListTablesSuiteTable", df.logicalPlan, ignoreIfExists = true)
+      "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true)
     sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
     sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
     sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 71652897e6548..3c299daa778cc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -722,7 +722,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
     withTable(tableName) {
       val schema = StructType(StructField("int", IntegerType, true) :: Nil)
       val hiveTable = CatalogTable(
-        name = TableIdentifier(tableName, Some("default")),
+        identifier = TableIdentifier(tableName, Some("default")),
         tableType = CatalogTableType.MANAGED_TABLE,
         schema = Seq.empty,
         storage = CatalogStorageFormat(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index d59bca4c7ee4d..8b0719209dedf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -148,7 +148,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
     test(s"$version: createTable") {
       val table =
         CatalogTable(
-          name = TableIdentifier("src", Some("default")),
+          identifier = TableIdentifier("src", Some("default")),
           tableType = CatalogTableType.MANAGED_TABLE,
           schema = Seq(CatalogColumn("key", "int")),
           storage = CatalogStorageFormat(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index cfca93bbf0659..4c1b425b163c0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -480,7 +480,11 @@ abstract class HiveComparisonTest
                 val executions = queryList.map(new TestHive.QueryExecution(_))
                 executions.foreach(_.toRdd)
                 val tablesGenerated = queryList.zip(executions).flatMap {
-                  case (q, e) => e.sparkPlan.collect {
+                  // We should take executedPlan instead of sparkPlan, because in following codes we
+                  // will run the collected plans. As we will do extra processing for sparkPlan such
+                  // as adding exchage, collapsing codegen stages, etc., collecing sparkPlan here
+                  // will cause some errors when running these plans later.
+                  case (q, e) => e.executedPlan.collect {
                     case i: InsertIntoHiveTable if tablesRead contains i.table.tableName =>
                       (q, e, i)
                   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 197a123905d2a..79774f5913900 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -68,6 +68,11 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     }
   }
 
+  private def assertUnsupportedFeature(body: => Unit): Unit = {
+    val e = intercept[AnalysisException] { body }
+    assert(e.getMessage.toLowerCase.contains("unsupported operation"))
+  }
+
   test("SPARK-4908: concurrent hive native commands") {
     (1 to 100).par.map { _ =>
       sql("USE default")
@@ -1246,6 +1251,35 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
 
   // Put tests that depend on specific Hive settings before these last two test,
   // since they modify /clear stuff.
+
+  test("role management commands are not supported") {
+    assertUnsupportedFeature { sql("CREATE ROLE my_role") }
+    assertUnsupportedFeature { sql("DROP ROLE my_role") }
+    assertUnsupportedFeature { sql("SHOW CURRENT ROLES") }
+    assertUnsupportedFeature { sql("SHOW ROLES") }
+    assertUnsupportedFeature { sql("SHOW GRANT") }
+    assertUnsupportedFeature { sql("SHOW ROLE GRANT USER my_principal") }
+    assertUnsupportedFeature { sql("SHOW PRINCIPALS my_role") }
+    assertUnsupportedFeature { sql("SET ROLE my_role") }
+    assertUnsupportedFeature { sql("GRANT my_role TO USER my_user") }
+    assertUnsupportedFeature { sql("GRANT ALL ON my_table TO USER my_user") }
+    assertUnsupportedFeature { sql("REVOKE my_role FROM USER my_user") }
+    assertUnsupportedFeature { sql("REVOKE ALL ON my_table FROM USER my_user") }
+  }
+
+  test("import/export commands are not supported") {
+    assertUnsupportedFeature { sql("IMPORT TABLE my_table FROM 'my_path'") }
+    assertUnsupportedFeature { sql("EXPORT TABLE my_table TO 'my_path'") }
+  }
+
+  test("some show commands are not supported") {
+    assertUnsupportedFeature { sql("SHOW CREATE TABLE my_table") }
+    assertUnsupportedFeature { sql("SHOW COMPACTIONS") }
+    assertUnsupportedFeature { sql("SHOW TRANSACTIONS") }
+    assertUnsupportedFeature { sql("SHOW INDEXES ON my_table") }
+    assertUnsupportedFeature { sql("SHOW LOCKS my_table") }
+  }
+
 }
 
 // for SPARK-2180 test