第五章_Spark核心编程_Rdd_行动算子_countByKey
1.定义
/* * 1.定义 * def countByKey(): Map[K, Long] * 2.功能 * 统计每种 key 的个数 * 3.执行流程 * 1. 每个节点统计分区key,count(1) * 2. 拉取每个分区 key,count(1),再做聚合 * * */
2.示例
object countByKeyTest extends App { val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest") val sc: SparkContext = new SparkContext(sparkconf) private val rdd = sc.makeRDD(List((1,"x"),(1,"x"),(2,"x"),(2,"x"),(2,"x")), 2) private val result: collection.Map[Int, Long] = rdd.countByKey() println(result) sc.stop() }