CHANGELOG
---------
+v0.3.5:
+ - fix to subword-bpe command under Python 2
+ - wider support of --total-symbols argument
+
v0.3.4:
- segment_tokens method to improve library usability (https://github.com/rsennrich/subword-nmt/pull/52)
- support regex glossaries (https://github.com/rsennrich/subword-nmt/pull/56)
setup(
name='subword_nmt',
- version='0.3.4',
+ version='0.3.5',
description='Unsupervised Word Segmentation for Neural Machine Translation and Text Generation',
long_description=(codecs.open("README.md", encoding='utf-8').read() +
"\n\n" + codecs.open("CHANGELOG.md", encoding='utf-8').read()),
args = parser.parse_args()
- if sys.version_info < (3, 0):
- args.separator = args.separator.decode('UTF-8')
- if args.glossaries:
- args.glossaries = [g.decode('UTF-8') for g in args.glossaries]
-
if args.command == 'learn-bpe':
# read/write files as UTF-8
if args.input.name != '<stdin>':
else:
vocabulary = None
+ if sys.version_info < (3, 0):
+ args.separator = args.separator.decode('UTF-8')
+ if args.glossaries:
+ args.glossaries = [g.decode('UTF-8') for g in args.glossaries]
+
bpe = BPE(args.codes, args.merges, args.separator, vocabulary, args.glossaries)
for line in args.input:
get_vocab(args.input, args.output)
elif args.command == 'learn-joint-bpe-and-vocab':
learn_joint_bpe_and_vocab(args)
+ if sys.version_info < (3, 0):
+ args.separator = args.separator.decode('UTF-8')
else:
raise Exception('Invalid command provided')