Skip to content

Instantly share code, notes, and snippets.

@deepankar14693
Created February 22, 2018 04:48
Show Gist options
  • Save deepankar14693/e93f2ea30264fdec812e639e14097c15 to your computer and use it in GitHub Desktop.
Save deepankar14693/e93f2ea30264fdec812e639e14097c15 to your computer and use it in GitHub Desktop.
spark-assignment not complete
///////question 2
val x = sc.parallelize(List(1,1,2,3,4).distinct)
x: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[0] at parallelize at <console>:24
scala> x.collect
res0: Array[Int] = Array(1, 2, 3, 4)
scala> val y = sc.parallelize(List(1,2,3,4))
y: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[1] at parallelize at <console>:24
scala> val z = x.zip(y)
z: org.apache.spark.rdd.RDD[(Int, Int)] = ZippedPartitionsRDD2[2] at zip at <console>:28
scala> z.collect
res1: Array[(Int, Int)] = Array((1,1), (2,2), (3,3), (4,4))
////////question 1
scala> val line = sc.parallelize("Hello,world")
line: org.apache.spark.rdd.RDD[Char] = ParallelCollectionRDD[14] at parallelize at <console>:24
scala> val line = sc.parallelize(List("Hello,world"))
line: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[15] at parallelize at <console>:24
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment