Skip to content

Commit

Permalink
modified: EduNLP/SIF/tokenization/text/tokenization.py
Browse files Browse the repository at this point in the history
	modified:   tests/test_tokenizer/test_tokenizer.py
  • Loading branch information
KINGNEWBLUSH committed Mar 11, 2024
1 parent a289a7a commit ad7df8b
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 5 deletions.
2 changes: 1 addition & 1 deletion EduNLP/SIF/tokenization/text/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def tokenize(text,
elif (tokenizer == 'spacy'):
try:
spacy_tokenizer = spacy.load(tok_model)
except LookupError:
except OSError:
spacy.cli.download(tok_model)
spacy_tokenizer = spacy.load(tok_model)

Expand Down
6 changes: 2 additions & 4 deletions tests/test_tokenizer/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,8 @@ def test_TokenizerBPE():
', ', '$', '25', '$ ', 'e', 'a', 'c', 'h', ', ', 'h', 'ow', ' m', 'an', 'y',
' ', 'are', ' ', 's', 'o', 'l', 'd']
]
tokenizer = get_tokenizer("pure_text",
text_params={"tokenizer": 'bpe',
"bpe_trainfile": "../../static/test_data/standard_luna_data.json",
"stopwords": set(",?")})
tokenizer = get_tokenizer("pure_text", text_params={"tokenizer": 'bpe', "stopwords": set(",?"),
"bpe_trainfile": "../../../../static/test_data/standard_luna_data.json"})
tokens = tokenizer(items)
ret = next(tokens)
assert ret == ans
Expand Down

0 comments on commit ad7df8b

Please sign in to comment.