Skip to content

Commit

Permalink
Merge pull request #1 from seahrh/rel/0.2.0
Browse files Browse the repository at this point in the history
Rel/0.2.0
  • Loading branch information
seahrh authored Mar 24, 2019
2 parents 8947f23 + 6472322 commit ffdb270
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 23 deletions.
16 changes: 16 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
language: scala
jdk: oraclejdk8
scala:
- "2.11.12"
# Use container-based infrastructure
sudo: false
cache:
directories:
- $HOME/.ivy2/cache
- $HOME/.sbt
before_cache:
# Cleanup the cached directories to avoid unnecessary cache updates
- find $HOME/.ivy2/cache -name "ivydata-*.properties" -print -delete
- find $HOME/.sbt -name "*.lock" -print -delete
script:
- sbt clean test
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![buildstatus](https://travis-ci.org/seahrh/spark-util.svg?branch=master)](https://travis-ci.org/seahrh/spark-util)

# spark-util
Tiny Spark utility for common use cases and bug workarounds. Unit tested on Spark 2.2.
## Dataset union bug
Expand Down Expand Up @@ -63,3 +65,17 @@ val result: SortedMap[String, Long] = a.value
See [CountAccumulatorSpec](src/test/scala/com/sgcharts/sparkutil/CountAccumulatorSpec.scala) for more examples.

Based on [hammerlab's](https://github.com/hammerlab/spark-util) `spark-util`.
## Logging
Spark uses log4j (not logback).

Writes to console `stderr` (default `log4j.properties` in spark/conf)

Use
```scala
import com.sgcharts.sparkutil.Log4jLogging

object MySparkApp extends Log4jLogging {
log.info("Hello World!")
}
```
See https://stackoverflow.com/questions/29208844/apache-spark-logging-within-scala
12 changes: 4 additions & 8 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,20 @@ lazy val root = (project in file(".")).
inThisBuild(List(
organization := "com.sgcharts",
scalaVersion := "2.11.12",
version := "1.0.0-SNAPSHOT"
version := "0.2.0"
)),
name := "spark-util",
libraryDependencies ++= Seq(
"org.scalatest" %% "scalatest" % versions.scalatest % Test,
"org.apache.spark" %% "spark-sql" % versions.spark % Provided,
"org.apache.spark" %% "spark-hive" % versions.spark % Test,
"ch.qos.logback" % "logback-classic" % versions.logback,
"com.typesafe.scala-logging" %% "scala-logging-slf4j" % versions.scalaLogging,
"com.holdenkarau" %% "spark-testing-base" % versions.sparkTestingBase % Test
)
)
lazy val versions = new {
val scalatest = "3.0.5"
val spark = "2.2.1"
val logback = "1.2.3"
val scalaLogging = "2.1.2"
val sparkTestingBase = "2.2.0_0.8.0"
val scalatest = "3.0.7"
val spark = "2.2.2"
val sparkTestingBase = "2.2.2_0.11.0"
}
wartremoverErrors ++= Warts.allBut(
Wart.ToString,
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.1.0
sbt.version=1.2.8
2 changes: 1 addition & 1 deletion project/wartremover.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.2.1")
addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.4.1")
6 changes: 6 additions & 0 deletions src/main/resources/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
8 changes: 8 additions & 0 deletions src/main/scala/com/sgcharts/sparkutil/Log4jLogging.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.sgcharts.sparkutil

import org.apache.log4j.Logger

trait Log4jLogging {
@transient protected lazy val log: Logger = Logger.getLogger(getClass.getCanonicalName)

}
5 changes: 2 additions & 3 deletions src/main/scala/com/sgcharts/sparkutil/package.scala
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package com.sgcharts

import com.typesafe.scalalogging.slf4j.LazyLogging
import org.apache.spark.sql.DataFrame

package object sparkutil extends LazyLogging {
package object sparkutil extends Log4jLogging {

private def union(left: DataFrame, right: DataFrame): DataFrame = {
val cols: Array[String] = left.columns
val res: DataFrame = left.union(right.select(cols.head, cols.tail: _*))
logger.debug(
log.debug(
s"""
|Left schema ${left.schema.treeString}
|Right schema ${right.schema.treeString}
Expand Down
6 changes: 6 additions & 0 deletions src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Set everything to be logged to the console
log4j.rootCategory=OFF, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
10 changes: 0 additions & 10 deletions src/test/resources/logback-test.xml

This file was deleted.

0 comments on commit ffdb270

Please sign in to comment.