You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
1.9 KiB

import os
import pickle
import sys
DEBUG = os.environ.get('DEBUG', '') == '1'
class Trie:
def __init__(self, value):
self.value = value
self.children = []
self.is_leaf = False
def add(self, value):
node = self
for ch in value:
child_matches = False
for child in node.children:
if child.value == value:
node = child
child_matches = True
break
if not child_matches:
to_add = Trie(value)
node.children.append(to_add)
node = to_add
node.is_leaf = True
def __contains__(self, search):
node = self
if not self.children:
return False
for ch in search:
found = False
for child in node.children:
if child.value == ch:
found = True
node = child
break
if not found:
return False
return True
def main():
words_file = os.environ.get('WORDS', 'words-en.txt')
words_dat_file = os.environ.get('WORDS_DAT', 'words-en.dat')
words_size_range = range(*(
map(lambda s: int(s),
os.environ.get('WORDS_SIZE_RANGE', '2-15').split('-'))))
valid_words = Trie('')
with open(words_file) as inwords:
debug('reading from {!r}'.format(words_file))
for word in inwords.read().split():
if len(word) not in words_size_range:
continue
debug('adding {!r} to valid words'.format(word))
valid_words.add(word)
with open(words_dat_file, 'wb') as outdat:
debug('writing valid words to {!r}'.format(words_dat_file))
pickle.dump(valid_words, outdat)
return 0
def debug(msg):
if not DEBUG:
return
print(msg, file=sys.stderr)
if __name__ == '__main__':
sys.exit(main())