import org.apache.spark.{SparkConf, SparkContext}object Main {def main(args: Array[String]): Unit = {//打印hello worldprintln("hello world")//学习spark RDD中的转换算子//1.map://2.filter:过滤//3.flatMap:flat (扁平化) + map (映射)//4.reduceByKey:键值对的数据 (world,1),(hello,1)val conf = new SparkConf().setMaster("local[*]").setAppName("Test")val sc = new SparkContext(conf)//创建一个RDD//val rdd = sc.parallelize(List(1,2,3,4,5,6,7,8,9,10))//val rdd1=rdd.map(x=>x*2)//使用filter算子,保留偶数//val rdd1=rdd.filter(x => x % 2 == 0)//有多个句子,每个句子有多个单词,单词之间使用空格隔开//目标,把所有的单词找出来,放一个数组中//val rdd = sc.parallelize(List("hello world", "hello scala"))//val rdd1 = rdd.flatMap(_.split(" "))//hello world hello scala//词填统计的列子val rdd = sc.parallelize(List("apple", "banana", "apple", "banana", "apple"))//val rdd1 = rdd.map(x => (x,1))//val rdd3 = rdd1.reduceByKey((x,y) => x + y)rdd.map(x => (x,1)).reduceByKey((x,y) => x + y).collect().foreach(println)//collect() 行动算子
// rdd3.collect().foreach(println)}
}