-
Notifications
You must be signed in to change notification settings - Fork 0
/
WordCount.scala
33 lines (21 loc) · 918 Bytes
/
WordCount.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import org.apache.spark.{SparkContext, SparkConf}
object WordCount{
def main(args: Array[String]) {
val master = "local[2]"
val cfg = new SparkConf().setAppName("WordCountJob").setMaster(master)
val sc = new SparkContext(cfg)
val fileName = "file:///media/linux-1/spark-1.6.0-bin-hadoop2.6/README.md"
val file = sc.textFile(fileName)
// The following line uses the placeholder syntax
//val words = file.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
// The following line uses the anonymous function syntax
val words = file.flatMap(lines => lines.split(" ")).map(words => (words, 1)).reduceByKey((a, b) => a + b)
/* This will collect all the results to the driver in the cluster and print.
This may lead to out of memory if the data set is big, in that case use
words.take(100).foreach(println)
*/
words.collect.foreach(println)
sc.stop
sc.stop
}
}