-
Notifications
You must be signed in to change notification settings - Fork 0
/
hw5.py
59 lines (48 loc) · 1.76 KB
/
hw5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# -*- coding: utf-8 -*-
import csv
import json
import pickle
import string
from collections import Counter
def main(filename):
# read file into lines
lines = open(filename).readlines()
# declare a word list
all_words = []
# extract all words from lines
for line in lines:
# split a line of text into a list words
# "I have a dream." => ["I", "have", "a", "dream."]
words = line.split()
# check the format of words and append it to "all_words" list
for word in words:
# then, remove (strip) unwanted punctuations from every word
# "dream." => "dream"
word = word.strip(string.punctuation)
# check if word is not empty
if word:
# append the word to "all_words" list
all_words.append(word)
# compute word count from all_words
counter = Counter(all_words)
# dump to a csv file named "wordcount.csv":
# word,count
# a,12345
# I,23456
# ...
with open("wordcount.csv","w",newline='') as csv_file:
# create a csv writer from a file object (or descriptor)
writer = csv.writer(csv_file)
# write table head
writer.writerow(['word', 'count'])
# write all (word, count) pair into the csv writer
writer.writerows(counter.most_common())
# dump to a json file named "wordcount.json"
with open("wordcount.json","w") as json_file:
writer = json.dump(counter.most_common(), json_file)
# BONUS: dump to a pickle file named "wordcount.pkl"
# hint: dump the Counter object directly
with open ("wordcount.pkl", "wb") as pkl_file:
writer = pickle.dump(counter.most_common(), pkl_file)
if __name__ == '__main__':
main("i_have_a_dream.txt")