# Word Frequency import re fn = "ttlg.txt" fh = open(fn) # to store word frequencies freq = {} # iterate over the file for text in fh: # same transformation as before text = re.sub("[^\w]", " ", text) text = text.lower() text = re.sub("\s+", " ", text) text = text.lstrip() text = text.rstrip() # iterate over words for word in text.split(): # need to test if word is in dictionary # if it isn't, trying to access it is an error! if freq.has_key(word): freq[word] = freq[word] + 1 else: freq[word] = 1 # let's also make a list of words sorted by frequency # need to flip around key/value pairs so we can sort by value items = [(item[1], item[0]) for item in freq.items()] # sort the list items.sort() # reverse it items.reverse() # alternatively, items.sort(reverse=True) will sort in reverse order # pull out the words words = [pair[1] for pair in items]