-
Notifications
You must be signed in to change notification settings - Fork 1
/
util.py
55 lines (36 loc) · 1.13 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from fileLoader import indexCorpus
from document import buildInvertedIndex
import re
from Algorithms.stemmer_algo import PorterStemmer
def get_document(word, index):
documents = set()
if word in index:
for doc, _ in index[word][1:]:
documents.add(doc)
return documents
def merge_or(wordList, index):
commonDocument = set()
for word in wordList:
commonDocument |= get_document(word, index)
return commonDocument
def merge_and(wordList, index):
common_document = get_document(wordList[0], index)
for word in wordList[1:]:
common_document &= get_document(word, index)
return common_document
def make_query_and(query, index):
query = query.lower()
query = query.split(' ')
p = PorterStemmer()
words = []
for word in query:
words.append(p.stem(word, 0, len(word) - 1))
return merge_and(words, index)
def make_query_or(query, index):
query = query.lower()
query = query.split(' ')
p = PorterStemmer()
words = []
for word in query:
words.append(p.stem(word, 0, len(word) - 1))
return merge_or(words, index)