self.vocab = vocab
- self.glossaries = glossaries
+ self.glossaries = glossaries if glossaries else []
def segment(self, sentence):
"""segment single sentence (whitespace-tokenized string) with BPE encoding"""
else:
vocabulary = None
- bpe = BPE(args.codes, args.separator, vocabulary)
+ bpe = BPE(args.codes, args.separator, vocabulary, args.glossaries)
for line in args.input:
args.output.write(bpe.segment(line).strip())
test_case = (orig, exp)
self._run_test_case(test_case)
- def test_isolated_glossary(self):
- orig = 'like'
- exp = ['like']
- test_case = (orig, exp)
- self._run_test_case(test_case)
-
def test_word_one_side(self):
orig = 'likeword'
exp = ['like', 'word']