需求:求统计平均年龄
开发步骤:
1、取出年龄
2、计算年龄的综合
3、计算记录总数 count
import sys
from pyspark import SparkContext
from pyspark import SparkConf
"""
求平均数
"""
if __name__ == '__main__':
if len(sys.argv) !=2 :
print("统计平均年龄 <input>",file=sys.stderr)
sys.exit(-1)
conf = SparkConf().setMaster("local[2]").setAppName("spark03")
sc = SparkContext(conf=conf)
ageData = sc.textFile(sys.argv[1]).map(lambda x : x.split(" ")[1])
totalAge = ageData.map(lambda age:int(age)).reduce(lambda a,b:a+b)
counts = ageData.count()
avgAge = totalAge / counts
print(counts)
print(totalAge)
print(avgAge)
sc.stop()