Skip to content

Instantly share code, notes, and snippets.

@ssmaroju
Forked from mhausenblas/SparkGrep.scala
Created March 16, 2016 19:36
Show Gist options
  • Save ssmaroju/a0b0ae5606f061c3d59a to your computer and use it in GitHub Desktop.
Save ssmaroju/a0b0ae5606f061c3d59a to your computer and use it in GitHub Desktop.
Scala Spark skeleton implementing grep
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>my.org</groupId>
<artifactId>spark-grep</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.2.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
</plugin>
</plugins>
</build>
</project>
package spark.example
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object SparkGrep {
def main(args: Array[String]) {
if (args.length < 3) {
System.err.println("Usage: SparkGrep <host> <input_file> <match_term>")
System.exit(1)
}
val conf = new SparkConf().setAppName("SparkGrep").setMaster(args(0))
val sc = new SparkContext(conf)
val inputFile = sc.textFile(args(1), 2).cache()
val matchTerm : String = args(2)
val numMatches = inputFile.filter(line => line.contains(matchTerm)).count()
println("%s lines in %s contain %s".format(numMatches, args(1), matchTerm))
System.exit(0)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment