Skip to content

Commit

Permalink
Support More Aggregate Methods (#265)
Browse files Browse the repository at this point in the history
  • Loading branch information
elvaliuliuliu authored and imback82 committed Sep 25, 2019
1 parent 047a547 commit 131d9b1
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
22 changes: 19 additions & 3 deletions src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,25 @@ public void TestSignaturesV2_3_X()
Assert.IsType<RelationalGroupedDataset>(_df.GroupBy(_df["age"]));
Assert.IsType<RelationalGroupedDataset>(_df.GroupBy(_df["age"], _df["name"]));

Assert.IsType<DataFrame>(_df.GroupBy("name").Sum("age"));
Assert.IsType<DataFrame>(
_df.WithColumn("tempAge", _df["age"]).GroupBy("name").Sum("age", "tempAge"));
{
RelationalGroupedDataset df =
_df.WithColumn("tempAge", _df["age"]).GroupBy("name");

Assert.IsType<DataFrame>(df.Mean("age"));
Assert.IsType<DataFrame>(df.Mean("age", "tempAge"));

Assert.IsType<DataFrame>(df.Max("age"));
Assert.IsType<DataFrame>(df.Max("age", "tempAge"));

Assert.IsType<DataFrame>(df.Avg("age"));
Assert.IsType<DataFrame>(df.Avg("age", "tempAge"));

Assert.IsType<DataFrame>(df.Min("age"));
Assert.IsType<DataFrame>(df.Min("age", "tempAge"));

Assert.IsType<DataFrame>(df.Sum("age"));
Assert.IsType<DataFrame>(df.Sum("age", "tempAge"));
}

Assert.IsType<RelationalGroupedDataset>(_df.Rollup("age"));
Assert.IsType<RelationalGroupedDataset>(_df.Rollup("age", "name"));
Expand Down
33 changes: 33 additions & 0 deletions src/csharp/Microsoft.Spark/Sql/RelationalGroupedDataset.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,42 @@ public DataFrame Agg(Column expr, params Column[] exprs) =>
public DataFrame Count() =>
new DataFrame((JvmObjectReference)_jvmObject.Invoke("count"));

/// <summary>
/// Compute the mean value for each numeric columns for each group.
/// </summary>
/// <param name="colNames">Name of columns to compute mean on</param>
/// <returns>New DataFrame object with mean applied</returns>
public DataFrame Mean(params string[] colNames) =>
new DataFrame((JvmObjectReference)_jvmObject.Invoke("mean", (object)colNames));

/// <summary>
/// Compute the max value for each numeric columns for each group.
/// </summary>
/// <param name="colNames">Name of columns to compute max on</param>
/// <returns>New DataFrame object with max applied</returns>
public DataFrame Max(params string[] colNames) =>
new DataFrame((JvmObjectReference)_jvmObject.Invoke("max", (object)colNames));

/// <summary>
/// Compute the average value for each numeric columns for each group.
/// </summary>
/// <param name="colNames">Name of columns to compute average on</param>
/// <returns>New DataFrame object with average applied</returns>
public DataFrame Avg(params string[] colNames) =>
new DataFrame((JvmObjectReference)_jvmObject.Invoke("avg", (object)colNames));

/// <summary>
/// Compute the min value for each numeric columns for each group.
/// </summary>
/// <param name="colNames">Name of columns to compute min on</param>
/// <returns>New DataFrame object with min applied</returns>
public DataFrame Min(params string[] colNames) =>
new DataFrame((JvmObjectReference)_jvmObject.Invoke("min", (object)colNames));

/// <summary>
/// Compute the sum for each numeric columns for each group.
/// </summary>
/// <param name="colNames">Name of columns to compute sum on</param>
/// <returns>New DataFrame object with sum applied</returns>
public DataFrame Sum(params string[] colNames) =>
new DataFrame((JvmObjectReference)_jvmObject.Invoke("sum", (object)colNames));
Expand Down

0 comments on commit 131d9b1

Please sign in to comment.