-
Notifications
You must be signed in to change notification settings - Fork 5
/
5_make_mdx.py
127 lines (121 loc) · 4.62 KB
/
5_make_mdx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from tqdm import tqdm
from utils.writemdict.writemdict import MDictWriter
from ryan_similar.similar_word import Distance_Similar
# ####################################################### build filter list
all_word_list = 'D:/github_project/make_anki_word_list/word_list/all.txt'
with open(all_word_list, encoding='utf-8') as f:
word_list = f.read().splitlines()
word_set = set(word_list)
word_set.discard('con')
# ##################################################### distance similar
edit_distance = Distance_Similar()
dictionary = dict()
for word in tqdm(word_set, desc='distance_similar.mdx'):
similar_str = edit_distance.get_similar_word_str(word)
if len(similar_str) > 0:
dictionary[word] = similar_str
writer = MDictWriter(dictionary, title="Distance Similar Dictionary", description="find similar by Levenshtein distance")
outfile = open("output/distance_similar.mdx", "wb")
writer.write(outfile)
outfile.close()
# ##################################################### no prefix similar
# no_prefix_similar = No_Prefix_Similar()
# dictionary = dict()
# for word in tqdm(word_set, desc='no_prefix_similar.mdx'):
# similar_str = no_prefix_similar.get_similar_word_str(word)
# if len(similar_str) > 0:
# dictionary[word] = similar_str
#
# writer = MDictWriter(dictionary, title="No Prefix Similar Dictionary", description="find similar by no prefix")
# outfile = open("output/no_prefix_similar.mdx", "wb")
# writer.write(outfile)
# outfile.close()
#
#
# ##################################################### no suffix similar
# no_suffix_similar = No_Suffix_Similar()
# dictionary = dict()
# for word in tqdm(word_set, desc='no_suffix_similar.mdx'):
# similar_str = no_suffix_similar.get_similar_word_str(word)
# if len(similar_str) > 0:
# dictionary[word] = similar_str
#
# writer = MDictWriter(dictionary, title="No suffix Similar Dictionary", description="find similar by no suffix")
# outfile = open("output/no_suffix_similar.mdx", "wb")
# writer.write(outfile)
# outfile.close()
#
#
# # ##################################################### youdict mem
# youdict_mem = Youdict_Mem()
# dictionary = dict()
# for word in tqdm(word_set, desc='youdict_mem.mdx'):
# mem_str = youdict_mem.get_mem_html(word)
# if len(mem_str) > 0:
# dictionary[word] = mem_str
#
# writer = MDictWriter(dictionary, title="Memory Dictionary", description="Memory Dictionary from www.youdict.com")
# outfile = open("output/youdict_mem.mdx", "wb")
# writer.write(outfile)
# outfile.close()
#
#
# # #################################################### root youdict yaml
# assembled_root = Assembled_Root()
#
# dictionary = dict()
# for word in tqdm(word_set, desc='root.mdx'):
# root = assembled_root.get_root_html(word)
# if len(root) > 0:
# dictionary[word] = root
#
# writer = MDictWriter(dictionary, title="Root and Affix Dictionary", description="Root and Affix Dictionary from www.youdict.com or yaml or etymonline")
# outfile = open("output/root.mdx", "wb")
# writer.write(outfile)
# outfile.close()
#
#
# # # #################################################### root youdict yaml
# yaml_root = Yaml_Root()
#
# dictionary = dict()
# for word in tqdm(word_set, desc='possible_prs.mdx'):
# possible_root = yaml_root.get_possible_prefix_root_suffix_html(word)
# if len(possible_root) > 0:
# dictionary[word] = possible_root
#
# writer = MDictWriter(dictionary, title="Root and Affix Dictionary", description="possible root and affix dictionary from yaml")
# outfile = open("output/possible_prs.mdx", "wb")
# writer.write(outfile)
# outfile.close()
#
#
#
# # #################################################### gre frequency
# all_word_list = 'D:/github_project/make_anki_word_list/word_list/GRE_frequency.txt'
# with open(all_word_list, encoding='utf-8') as f:
# line_list = f.read().splitlines()
#
# dictionary = dict()
# for line in line_list:
# word, frequency = line.split('\\')
# dictionary[word] = frequency
#
# writer = MDictWriter(dictionary, title="GRE word frequency", description="")
# with open("output/GRE_word_frequency.mdx", "wb") as f:
# writer.write(f)
#
# # #################################################### toefl frequency
# all_word_list = 'D:/github_project/make_anki_word_list/word_list/TOEFL_frequency_6.0.txt'
# with open(all_word_list, encoding='utf-8') as f:
# line_list = f.read().splitlines()
#
# dictionary = dict()
# for line in line_list:
# word, frequency = line.split('\\')
# dictionary[word] = frequency
#
# writer = MDictWriter(dictionary, title="TOEFL word frequency", description="")
# with open("output/TOEFL_word_frequency.mdx", "wb") as f:
# writer.write(f)
#