A data structure for searching for keywords.
- Supports Python 2.7, Python 3.5
- PyPI: https://pypi.org/project/keyword-tree/
- GitHub: https://github.com/david-pettifor-nd/keyword_tree
pip install keyword-tree
or download the package and python setup.py install
from keywordtree import Tree
# empty tree with all defaults
tree = Tree()
# tree with a list of keywords passed in
tree = Tree(keywords=['bobby', 'blanket', 'bubbles', 'book', 'blood', 'bar', 'cat', 'category', 'car', 'cost'])
# tree with a memory consumption limit set to 1 MB (pass in number of bytes; default = no limit)
tree = Tree(memory_limit=(1024 * 1024),
keywords=['bobby', 'blanket', 'bubbles', 'book', 'blood', 'bar', 'cat', 'category', 'car', 'cost'])
A tree built on a list of dictionary entries, allowing for case-sensitive or RegularExpressions to be enforced
# A list of keywords, the first preserves capitalization, the second doesn't use regular expressions, the third both, and the fourth neither
keyword_list = [
{'keyword': 'Notre Dame', 'case': True},
{'keyword': 'university', 'regex': False},
{'keyword': 'Fighting', 'case': True, 'regex': False},
'irish'
]
tree = Tree(keywords=keyword_list)
# You can also add a list to a pre-existing tree
tree = KeywordTree()
keyword_list = [
{'keyword': 'Notre Dame', 'case': True},
{'keyword': 'university', 'regex': False},
{'keyword': 'Fighting', 'case': True, 'regex': False},
'irish'
]
tree.add_list(keyword_list)
# tree with a list of keywords passed in
tree = Tree(keywords=['bobby', 'blanket', 'bubbles', 'book', 'blood', 'bar', 'cat', 'category', 'car', 'cost'])
# then remove the word 'bubbles'
tree.remove_keyword('bubbles')
# tree with a list of keywords passed in
tree = Tree(keywords=['bobby', 'blanket', 'bubbles', 'book', 'blood', 'bar', 'cat', 'category', 'car', 'cost'])
# then export to a file 'mytree.json'
file_out = open('mytree.json', 'w')
file_out.write(tree.dump())
# empty tree
tree = Tree()
# then import from a file 'mytree.json'
file_in = open('mytree.json', 'r')
tree.load(file.read())
# Import the keyword tree
from keywordtree import Tree
# Create a simple list of keywords
word_list = ['bobby', 'blanket', 'bubbles', 'book', 'blood', 'bar', 'cat', 'category', 'car', 'cost']
# Create the tree
tree = Tree(word_list)
# print the tree
tree.print_tree()
# Search the following text for the keywords listed
found_words = tree.prune('My cat and I are reading a book, covered with my favorite blanket at the bar.')
# print the words we found - this is a list
print(found_words)
Output:
Tree Size: 10808 bytes
root
- 'b'
- 'bubbles'
- 'bo'
- 'book'
- 'bobby'
- 'bl'
- 'blood'
- 'blanket'
- 'bar'
- 'c'
- 'cost'
- 'ca'
- 'car'
- 'cat'
- 'category'
- 'cat'
['book', 'blanket', 'bar', 'cat']
Note the keyword Nashville
- by default capitalization is turned off. So the construction will cast it to lower case (see output)
# Import the keyword tree
from keywordtree import Tree
# Create a simple list of keywords
word_list = [{'keyword': 'Notre Dame', 'case': True}, 'Nashville', {'keyword': 'North Dakota', 'case': True}]
# Create the tree
tree = Tree(word_list)
# print the tree
tree.print_tree()
Output:
Tree Size: 3408 bytes
root
- 'nashville'
- 'No'
- 'North Dakota'
- 'Notre Dame'
Note that for the word cat
we turn regular expressions (regex
) OFF (False
). This means that it will find the word cat
within the larger word catastrophe
. Yet, because regex
is True
by default, the word his
does not show up in the first word This
.
# Import the keyword tree
from keywordtree import Tree
# Create a simple list of keywords
word_list = ['his', {'keyword': 'cat', 'regex': False}]
# Create the tree
tree = Tree(word_list)
# Find the above keywords in the following text:
print tree.prune('This sentence could be a catastrophe.')