import syslinecount=0# Get input lines from stdinfor line in sys.stdin:# Remove spaces from beginning and end of the line line = line.strip()# Remove the 2., and only keep the decimals if linecount ==0: line = line[2:]# Split it into list of numbers numbers =list(line)# Output tuples on stdoutfor number in numbers:print ('%s\t%s'% (number, "1")) linecount+=1
import sys# Create a dictionary to map numbers to countsnumbercount ={}# Get input from stdinfor line in sys.stdin:#Remove spaces from beginning and end of the line line = line.strip()# parse the input from mapper.py number, count = line.split('\t', 1)# convert count (currently a string) to inttry: count =int(count) number =int(number)exceptValueError:continuetry: numbercount[number]= numbercount[number]+countexcept: numbercount[number]= countsumnumber, countnumber =0,0for number in numbercount.keys():# Write the tuples to stdout# Currently tuples are unsorted sumnumber += number*numbercount[number] countnumber += numbercount[number]print ('%s\t%s'% ( number, numbercount[number] ))# Now they are sorted by the values, in increasing order print(sorted(numbercount.items(), key=lambdakv:(kv[1], kv[0])))# Calculate the average of the digitsprint('avg=', sumnumber/countnumber)
结果是 一堆key value pairs 如果用以上的map reduce来处理sqrt2,那么得到的结果是