78 lines
1.9 KiB
Python
78 lines
1.9 KiB
Python
|
import os
|
||
|
import pickle
|
||
|
import sys
|
||
|
|
||
|
DEBUG = os.environ.get('DEBUG', '') == '1'
|
||
|
|
||
|
|
||
|
class Trie:
|
||
|
def __init__(self, value):
|
||
|
self.value = value
|
||
|
self.children = []
|
||
|
self.is_leaf = False
|
||
|
|
||
|
def add(self, value):
|
||
|
node = self
|
||
|
for ch in value:
|
||
|
child_matches = False
|
||
|
for child in node.children:
|
||
|
if child.value == value:
|
||
|
node = child
|
||
|
child_matches = True
|
||
|
break
|
||
|
if not child_matches:
|
||
|
to_add = Trie(value)
|
||
|
node.children.append(to_add)
|
||
|
node = to_add
|
||
|
node.is_leaf = True
|
||
|
|
||
|
def __contains__(self, search):
|
||
|
node = self
|
||
|
if not self.children:
|
||
|
return False
|
||
|
for ch in search:
|
||
|
found = False
|
||
|
for child in node.children:
|
||
|
if child.value == ch:
|
||
|
found = True
|
||
|
node = child
|
||
|
break
|
||
|
if not found:
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
|
||
|
def main():
|
||
|
words_file = os.environ.get('WORDS', 'words-en.txt')
|
||
|
words_dat_file = os.environ.get('WORDS_DAT', 'words-en.dat')
|
||
|
words_size_range = range(*(
|
||
|
map(lambda s: int(s),
|
||
|
os.environ.get('WORDS_SIZE_RANGE', '2-15').split('-'))))
|
||
|
|
||
|
valid_words = Trie('')
|
||
|
|
||
|
with open(words_file) as inwords:
|
||
|
debug('reading from {!r}'.format(words_file))
|
||
|
for word in inwords.read().split():
|
||
|
if len(word) not in words_size_range:
|
||
|
continue
|
||
|
|
||
|
debug('adding {!r} to valid words'.format(word))
|
||
|
valid_words.add(word)
|
||
|
|
||
|
with open(words_dat_file, 'wb') as outdat:
|
||
|
debug('writing valid words to {!r}'.format(words_dat_file))
|
||
|
pickle.dump(valid_words, outdat)
|
||
|
|
||
|
return 0
|
||
|
|
||
|
|
||
|
def debug(msg):
|
||
|
if not DEBUG:
|
||
|
return
|
||
|
print(msg, file=sys.stderr)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
sys.exit(main())
|