Addresses PR comments

databricks · Jul 14, 2016 · cf7d394 · cf7d394
1 parent 8dc22ff
commit cf7d394
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -24,20 +24,23 @@ This library is more suited to ETL than interactive queries, since large amounts
 
 This library requires Apache Spark 2.0+ and Amazon Redshift 1.0.963+.
 
+For version that works with Spark 1.x, please check for the [1.x branch](https://github.com/databricks/spark-redshift/tree/branch-1.x).
+
 You may use this library in your applications with the following dependency information:
 
 **Scala 2.10**
+
 ```
 groupId: com.databricks
 artifactId: spark-redshift_2.10
-version: 0.7.0
+version: 2.0.0-SNAPSHOT
 ```
 
 **Scala 2.11**
 ```
 groupId: com.databricks
 artifactId: spark-redshift_2.11
-version: 0.7.0
+version: 2.0.0-SNAPSHOT
 ```
 
 You will also need to provide a JDBC driver that is compatible with Redshift. Amazon recommend that you use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html), which is distributed as a JAR that is hosted on Amazon's website. This library has also been successfully tested using the Postgres JDBC driver.
@@ -81,19 +84,19 @@ val df: DataFrame = sqlContext.read
 
 df.write
   .format("com.databricks.spark.redshift")
-    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
-    .option("dbtable", "my_table_copy")
-    .option("tempdir", "s3n://path/for/temp/data")
+  .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
+  .option("dbtable", "my_table_copy")
+  .option("tempdir", "s3n://path/for/temp/data")
   .mode("error")
   .save()
-  
+
 // Using IAM Role based authentication
 df.write
   .format("com.databricks.spark.redshift")
-    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
-    .option("dbtable", "my_table_copy")
-    .option("aws_iam_role", "arn:aws:iam::123456789000:role/redshift_iam_role")
-    .option("tempdir", "s3n://path/for/temp/data")
+  .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
+  .option("dbtable", "my_table_copy")
+  .option("aws_iam_role", "arn:aws:iam::123456789000:role/redshift_iam_role")
+  .option("tempdir", "s3n://path/for/temp/data")
   .mode("error")
   .save()
 ```
@@ -130,8 +133,7 @@ df.write \
   .option("tempdir", "s3n://path/for/temp/data") \
   .mode("error") \
   .save()
-
-
+
 # Using IAM Role based authentication
 df.write \
   .format("com.databricks.spark.redshift") \

diff --git a/src/it/scala/com/databricks/spark/redshift/RedshiftIntegrationSuite.scala b/src/it/scala/com/databricks/spark/redshift/RedshiftIntegrationSuite.scala
@@ -355,7 +355,7 @@ class RedshiftIntegrationSuite extends IntegrationSuiteBase {
     // .rdd() forces the first query to be unloaded from Redshift
     val rdd1 = sqlContext.sql("select testint from test_table").rdd
     // Similarly, this also forces an unload:
-    val rdd2 = sqlContext.sql("select testdouble from test_table").rdd
+    sqlContext.sql("select testdouble from test_table").rdd
     // If the unloads were performed into the same directory then this call would fail: the
     // second unload from rdd2 would have overwritten the integers with doubles, so we'd get
     // a NumberFormatException.
@@ -599,9 +599,9 @@ class RedshiftIntegrationSuite extends IntegrationSuiteBase {
   }
 
   test("Respect SaveMode.ErrorIfExists when table exists") {
-    val rdd = sc.parallelize(TestUtils.expectedData.toSeq)
+    val rdd = sc.parallelize(TestUtils.expectedData)
     val df = sqlContext.createDataFrame(rdd, TestUtils.testSchema)
-    df.registerTempTable(test_table) // to ensure that the table already exists
+    df.createOrReplaceTempView(test_table) // to ensure that the table already exists
 
     // Check that SaveMode.ErrorIfExists throws an exception
     intercept[AnalysisException] {

diff --git a/src/main/scala/com/databricks/spark/redshift/RedshiftRelation.scala b/src/main/scala/com/databricks/spark/redshift/RedshiftRelation.scala
@@ -81,9 +81,6 @@ private[redshift] case class RedshiftRelation(
     writer.saveToRedshift(sqlContext, data, saveMode, params)
   }
 
-  // In Spark 1.6+, this method allows a data source to declare which filters it handles, allowing
-  // Spark to skip its own defensive filtering. See SPARK-10978 for more details. As long as we
-  // compile against Spark 1.4, we cannot use the `override` modifier here.
   override def unhandledFilters(filters: Array[Filter]): Array[Filter] = {
     filters.filterNot(filter => FilterPushdown.buildFilterExpression(schema, filter).isDefined)
   }

diff --git a/version.sbt b/version.sbt
@@ -1 +1 @@
-version in ThisBuild := "1.0.1-SNAPSHOT"
+version in ThisBuild := "2.0.0-SNAPSHOT"
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		version in ThisBuild := "1.0.1-SNAPSHOT"
		version in ThisBuild := "2.0.0-SNAPSHOT"