From a0f2620880a7622fe0605c8f09288dc583a3705a Mon Sep 17 00:00:00 2001 From: Terry Kim Date: Wed, 5 Jun 2019 07:44:14 -0700 Subject: [PATCH] v0.3.0 release prep (#128) --- README.md | 9 ++-- benchmark/scala/pom.xml | 2 +- docs/getting-started/ubuntu-instructions.md | 2 +- docs/getting-started/windows-instructions.md | 2 +- docs/release-notes/0.3/release-0.3.md | 46 +++++++++++++++++++ eng/Versions.props | 2 +- .../Microsoft.Spark.E2ETest/SparkFixture.cs | 2 +- .../Microsoft.Spark/Sql/SparkSession.cs | 24 ++++++---- src/scala/microsoft-spark-2.3.x/pom.xml | 2 +- src/scala/microsoft-spark-2.4.x/pom.xml | 2 +- src/scala/pom.xml | 2 +- 11 files changed, 74 insertions(+), 21 deletions(-) create mode 100644 docs/release-notes/0.3/release-0.3.md diff --git a/README.md b/README.md index e00c60d45..a71a75b00 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ 2.3.* - v0.2.0 + v0.3.0 2.4.0 @@ -45,12 +45,11 @@ 2.4.1 - 2.4.2 - Not supported + 2.4.3 - 2.4.3 - master branch + 2.4.2 + Not supported diff --git a/benchmark/scala/pom.xml b/benchmark/scala/pom.xml index 5affb0411..180127e3b 100644 --- a/benchmark/scala/pom.xml +++ b/benchmark/scala/pom.xml @@ -3,7 +3,7 @@ 4.0.0 com.microsoft.spark microsoft-spark-benchmark - 0.2.0 + 0.3.0 2019 UTF-8 diff --git a/docs/getting-started/ubuntu-instructions.md b/docs/getting-started/ubuntu-instructions.md index 965367a16..2c1ccb753 100644 --- a/docs/getting-started/ubuntu-instructions.md +++ b/docs/getting-started/ubuntu-instructions.md @@ -7,7 +7,7 @@ These instructions will show you how to run a .NET for Apache Spark app using .N - Download and install the following: **[.NET Core 2.1 SDK](https://dotnet.microsoft.com/download/dotnet-core/2.1)** | **[OpenJDK 8](https://openjdk.java.net/install/)** | **[Apache Spark 2.4.1](https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz)** - Download and install **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release: - Select a **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release from .NET for Apache Spark GitHub Releases page and download into your local machine (e.g., `~/bin/Microsoft.Spark.Worker`). - - **IMPORTANT** Create a [new environment variable](https://help.ubuntu.com/community/EnvironmentVariables) `DotnetWorkerPath` and set it to the directory where you downloaded and extracted the Microsoft.Spark.Worker (e.g., `~/bin/Microsoft.Spark.Worker`). + - **IMPORTANT** Create a [new environment variable](https://help.ubuntu.com/community/EnvironmentVariables) `DOTNET_WORKER_DIR` and set it to the directory where you downloaded and extracted the Microsoft.Spark.Worker (e.g., `~/bin/Microsoft.Spark.Worker`). For detailed instructions, you can see [Building .NET for Apache Spark from Source on Ubuntu](../building/ubuntu-instructions.md). diff --git a/docs/getting-started/windows-instructions.md b/docs/getting-started/windows-instructions.md index 9d2fd8c01..05d6954fb 100644 --- a/docs/getting-started/windows-instructions.md +++ b/docs/getting-started/windows-instructions.md @@ -7,7 +7,7 @@ These instructions will show you how to run a .NET for Apache Spark app using .N - Download and install the following: **[.NET Core 2.1 SDK](https://dotnet.microsoft.com/download/dotnet-core/2.1)** | **[Visual Studio 2019](https://www.visualstudio.com/downloads/)** | **[Java 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)** | **[Apache Spark 2.4.1](https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz)** - Download and install **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release: - Select a **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release from .NET for Apache Spark GitHub Releases page and download into your local machine (e.g., `c:\bin\Microsoft.Spark.Worker\`). - - **IMPORTANT** Create a [new environment variable](https://www.java.com/en/download/help/path.xml) `DotnetWorkerPath` and set it to the directory where you downloaded and extracted the Microsoft.Spark.Worker (e.g., `c:\bin\Microsoft.Spark.Worker`). + - **IMPORTANT** Create a [new environment variable](https://www.java.com/en/download/help/path.xml) `DOTNET_WORKER_DIR` and set it to the directory where you downloaded and extracted the Microsoft.Spark.Worker (e.g., `c:\bin\Microsoft.Spark.Worker`). For detailed instructions, you can see [Building .NET for Apache Spark from Source on Windows](../building/windows-instructions.md). diff --git a/docs/release-notes/0.3/release-0.3.md b/docs/release-notes/0.3/release-0.3.md new file mode 100644 index 000000000..ba200520e --- /dev/null +++ b/docs/release-notes/0.3/release-0.3.md @@ -0,0 +1,46 @@ +# .NET for Apache Spark 0.3 Release Notes + +### Release Notes + +Below are some of the highlights from this release. + +* [Apache Spark 2.4.3](https://spark.apache.org/news/spark-2-4-3-released.html) support ([#118](https://github.com/dotnet/spark/pull/108)) +* dotnet/spark is now using [dotnet/arcade](https://github.com/dotnet/arcade) as the build infrastructure ([#113](https://github.com/dotnet/spark/pull/113)) + * [Source Link](https://github.com/dotnet/sourcelink) is now supported for the Nuget package ([#40](https://github.com/dotnet/spark/issues/40)). + * Fixed the issue where Microsoft.Spark.dll is not signed ([#119](https://github.com/dotnet/spark/issues/119)). +* Pickling performance is improved ([#111](https://github.com/dotnet/spark/pull/111)). + * Performance improvment PRs in the Pickling Library: [irmen/Pyrolite#64](https://github.com/irmen/Pyrolite/pull/64), [irmen/Pyrolite#67](https://github.com/irmen/Pyrolite/pull/67) +* ArrayType and MapType are supported as UDF return types ([#112](https://github.com/dotnet/spark/issues/112#issuecomment-493297068), [#114](https://github.com/dotnet/spark/pull/114)) + +### Supported Spark Versions + +The following table outlines the supported Spark versions along with the microsoft-spark JAR to use with: + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Spark Versionmicrosoft-spark JAR
2.3.*microsoft-spark-2.3.x-0.2.0.jar
2.4.0microsoft-spark-2.4.x-0.2.0.jar
2.4.1
2.4.3
2.4.2Not supported
diff --git a/eng/Versions.props b/eng/Versions.props index a74882ace..877833e34 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -1,7 +1,7 @@ - 0.2.0 + 0.3.0 prerelease $(RestoreSources); diff --git a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs index 8bc9ffc41..aeeb8e4d0 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs @@ -36,7 +36,7 @@ public SparkFixture() AppDomain.CurrentDomain.BaseDirectory); #elif NETCOREAPP2_1 // For .NET Core, the user must have published the worker as a standalone - // executable and set DotnetWorkerPath to the published directory. + // executable and set the worker path to the published directory. if (string.IsNullOrEmpty(Environment.GetEnvironmentVariable(workerDirEnvVarName))) { throw new Exception( diff --git a/src/csharp/Microsoft.Spark/Sql/SparkSession.cs b/src/csharp/Microsoft.Spark/Sql/SparkSession.cs index f5a2b9f52..e1d82de84 100644 --- a/src/csharp/Microsoft.Spark/Sql/SparkSession.cs +++ b/src/csharp/Microsoft.Spark/Sql/SparkSession.cs @@ -65,6 +65,22 @@ public void Dispose() public SparkSession NewSession() => new SparkSession((JvmObjectReference)_jvmObject.Invoke("newSession")); + /// + /// Returns the specified table/view as a DataFrame. + /// + /// Name of a table or view + /// DataFrame object + public DataFrame Table(string tableName) + => new DataFrame((JvmObjectReference)_jvmObject.Invoke("table", tableName)); + + /// + /// Executes a SQL query using Spark, returning the result as a DataFrame. + /// + /// SQL query text + /// DataFrame object + public DataFrame Sql(string sqlText) + => new DataFrame((JvmObjectReference)_jvmObject.Invoke("sql", sqlText)); + /// /// Returns a DataFrameReader that can be used to read non-streaming data in /// as a DataFrame. @@ -80,14 +96,6 @@ public DataFrameReader Read() => public DataStreamReader ReadStream() => new DataStreamReader((JvmObjectReference)_jvmObject.Invoke("readStream")); - /// - /// Executes a SQL query using Spark, returning the result as a DataFrame. - /// - /// SQL query text - /// DataFrame object - public DataFrame Sql(string sqlText) - => new DataFrame((JvmObjectReference)_jvmObject.Invoke("sql", sqlText)); - /// /// Returns UDFRegistraion object with which user-defined functions (UDF) can /// be registered. diff --git a/src/scala/microsoft-spark-2.3.x/pom.xml b/src/scala/microsoft-spark-2.3.x/pom.xml index e381e2b3e..1d2b09489 100644 --- a/src/scala/microsoft-spark-2.3.x/pom.xml +++ b/src/scala/microsoft-spark-2.3.x/pom.xml @@ -4,7 +4,7 @@ com.microsoft.scala microsoft-spark - 0.2.0 + 0.3.0 microsoft-spark-2.3.x 2019 diff --git a/src/scala/microsoft-spark-2.4.x/pom.xml b/src/scala/microsoft-spark-2.4.x/pom.xml index 33f02c964..1d43a1bb9 100644 --- a/src/scala/microsoft-spark-2.4.x/pom.xml +++ b/src/scala/microsoft-spark-2.4.x/pom.xml @@ -4,7 +4,7 @@ com.microsoft.scala microsoft-spark - 0.2.0 + 0.3.0 microsoft-spark-2.4.x 2019 diff --git a/src/scala/pom.xml b/src/scala/pom.xml index 09710e1f0..c337d258b 100644 --- a/src/scala/pom.xml +++ b/src/scala/pom.xml @@ -4,7 +4,7 @@ com.microsoft.scala microsoft-spark pom - 0.2.0 + 0.3.0 UTF-8