From 891ada5be1e7fdd796380e2626d80843f2ef6017 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 8 Apr 2015 18:47:39 -0700 Subject: [PATCH] [SPARK-6696] [SQL] Adds HiveContext.refreshTable to PySpark [Review on Reviewable](https://reviewable.io/reviews/apache/spark/5349) Author: Cheng Lian Closes #5349 from liancheng/py-refresh-table and squashes the following commits: 004bec0 [Cheng Lian] Adds HiveContext.refreshTable to PySpark --- python/pyspark/sql/context.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 93e2d176a5b6f..e8529a8f8e3a4 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -574,6 +574,15 @@ def _ssql_ctx(self): def _get_hive_ctx(self): return self._jvm.HiveContext(self._jsc.sc()) + def refreshTable(self, tableName): + """Invalidate and refresh all the cached the metadata of the given + table. For performance reasons, Spark SQL or the external data source + library it uses might cache certain metadata about a table, such as the + location of blocks. When those change outside of Spark SQL, users should + call this function to invalidate the cache. + """ + self._ssql_ctx.refreshTable(tableName) + class UDFRegistration(object): """Wrapper for user-defined function registration."""