10.1
fhand = open('mbox-short.txt')
d = {}
for line in fhand:
words = line.split()
if len(words) >= 2 and words[0] == 'From':
d[words[1]] = d.get(words[1], 0) + 1
fhand.close()
lt = []
for p in d:
lt.append((d[p], p))
lt.sort(reverse = True)
if len(lt) > 0:
(c, p) = lt[0]
print "Person with the most commits: ", p, " with ", c, " commits"
10.2
fhand = open('mbox-short.txt')
d = {}
for line in fhand:
words = line.split()
if len(words) >= 6 and words[0] == 'From':
h = words[5].split(':')
d[h[0]] = d.get(h[0], 0) + 1
fhand.close()
lt = d.items()
lt.sort()
for (h, f) in lt:
print h+" "+str(f)
10.3
def most_frequent(s):
f = {}
for i in s:
f[i] = f.get(i, 0) + 1
lt = []
for i in f:
lt.append((f[i], i))
lt.sort(reverse = True)
for (r, i) in lt:
print i+" "+str(r)
a = raw_input('File to use? ')
fhand = open(a)
s = ''
for line in fhand:
s = s + " " + line.lower()
fhand.close()
most_frequent(s)
I ran this one with part of http://www.gutenberg.org/cache/epub/12501/pg12501.txt (after recoding it to latin1).
The top ten letters are:
- e 6741
- a 5394
- o 4986
- s 4452
- n 3630
- r 3103
- l 2927
- d 2652
- i 2551
- u 2321
According to http://en.wikipedia.org/wiki/Letter_frequencies the most
frequent letters in spanish are "eaosr nidlc". So we obtained a very similar result.