spark实例4:求中位数
输入数据:
1 2 3 4 5 6 8 9 11 12 34
输出结果:
6
/**
* 求中位数
*/
object Median {
def main (args: Array[String]) {
val conf = new SparkConf().setAppName("Median").setMaster("local")
val sc = new SparkContext(conf)
val data = sc.textFile("E:\\newcode\\MyFirstProject\\data\\Median.txt")
val words = data.flatMap(_.split(" ")).map(word => word.toInt)
words.collect().foreach(println)
val number = words.map(word =>(word/4,word)).sortByKey()
number.collect().foreach(println)
val pariCount = words.map(word => (word/4,1)).reduceByKey(_+_).sortByKey()
pariCount.collect().foreach(println)
val count = words.count().toInt
var mid =0
if(count%2 != 0)
{
mid = count/2+1
}else
{
mid = count/2
}
var temp =0
var temp1= 0
var index = 0
val tongNumber = pariCount.count().toInt
var foundIt = false
for(i <- 0 to tongNumber-1 if !foundIt)
{
println(pariCount.collectAsMap()(i).toString);
temp = temp + pariCount.collectAsMap()(i)
temp1 = temp - pariCount.collectAsMap()(i)
if(temp >= mid)
{
index = i
foundIt = true
}
}
val tonginneroffset = mid - temp1
val median = number.filter(_._1==index).takeOrdered(tonginneroffset)
sc.setLogLevel("ERROR")
println(median(tonginneroffset-1)._2)
sc.stop()
}
}
