import os import pickle import sys DEBUG = os.environ.get('DEBUG', '') == '1' class Trie: def __init__(self, value): self.value = value self.children = [] self.is_leaf = False def add(self, value): node = self for ch in value: child_matches = False for child in node.children: if child.value == value: node = child child_matches = True break if not child_matches: to_add = Trie(value) node.children.append(to_add) node = to_add node.is_leaf = True def __contains__(self, search): node = self if not self.children: return False for ch in search: found = False for child in node.children: if child.value == ch: found = True node = child break if not found: return False return True def main(): words_file = os.environ.get('WORDS', 'words-en.txt') words_dat_file = os.environ.get('WORDS_DAT', 'words-en.dat') words_size_range = range(*( map(lambda s: int(s), os.environ.get('WORDS_SIZE_RANGE', '2-15').split('-')))) valid_words = Trie('') with open(words_file) as inwords: debug('reading from {!r}'.format(words_file)) for word in inwords.read().split(): if len(word) not in words_size_range: continue debug('adding {!r} to valid words'.format(word)) valid_words.add(word) with open(words_dat_file, 'wb') as outdat: debug('writing valid words to {!r}'.format(words_dat_file)) pickle.dump(valid_words, outdat) return 0 def debug(msg): if not DEBUG: return print(msg, file=sys.stderr) if __name__ == '__main__': sys.exit(main())