Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,9 @@
* Minstack
* [Test Min Stack](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/stacks/minstack/test_min_stack.py)
* [Test Stacks](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/stacks/test_stacks.py)
* Streams
* Stream Checker
* [Test Stream Checker](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/streams/stream_checker/test_stream_checker.py)
* Timemap
* [Test Timemap](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/timemap/test_timemap.py)
* Trees
Expand Down Expand Up @@ -303,6 +306,9 @@
* Ternary
* [Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/node.py)
* [Test Ternary Tree Paths](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/test_ternary_tree_paths.py)
* Trie
* [Trie](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie.py)
* [Trie Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie_node.py)
* Tuples
* [Named Tuples](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/tuples/named_tuples.py)

Expand Down
Empty file.
30 changes: 30 additions & 0 deletions datastructures/streams/stream_checker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Stream of Characters

Design a data structure that processes a stream of characters and, after each character is received, determines if a
suffix of these characters is a string in a given array of strings words.

For example, if words = ["dog"] and the stream adds the characters ‘d’, ‘c’, ‘a’ , and ‘t’ in sequence, the algorithm
should detect that the suffix "cat" of the stream "dcat" matches the word "cat" from the list.

So, for words, the goal is to detect if any of these words appear as a suffix of the stream built so far. To accomplish
this, implement a class StreamChecker:

- **Constructor**: Initializes the object with the list of target words.
- **boolean query(char letter)**: Appends a character to the stream and returns TRUE if any suffix of the stream matches
a word in the list words.

Constraints:

- 1 ≤ words.length ≤ 1000
- 1 ≤ words[i].length ≤ 200
- words[i] consists of lowercase English letters.
- letter is a lowercase English letter.
- At most 4 * 10^2 calls will be made to query.

Examples:

![Example 1](./images/examples/stream_checker_example_1.png)
![Example 2](./images/examples/stream_checker_example_2.png)
![Example 3](./images/examples/stream_checker_example_3.png)


73 changes: 73 additions & 0 deletions datastructures/streams/stream_checker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from typing import Deque, List
from collections import deque
from datastructures.trees.trie import TrieNode


class StreamChecker(object):

def __init__(self, words: List[str]):
"""
Initializes a StreamChecker instance.

Constructor Time: O(Ltotal), where Ltotal is the sum of the lengths of all words. This is a one-time cost.

Parameters:
words (List[str]): List of words to be checked in the stream.

Returns:
instance of streamchecker
"""
self.words = words
self.trie = TrieNode()
self.max_len = 0
self.__build_trie()
# deque(maxlen) is key for stream history optimization
self.stream: Deque[str] = deque(maxlen=self.max_len)

def __build_trie(self):
# insert the words in reverse order into the trie
for word in self.words[::-1]:
# 1. track max length for deque optimization
if len(word) > self.max_len:
self.max_len = len(word)

current = self.trie
# 2. insert characters in reverse order
for letter in word[::-1]:
current = current.children[letter]

# 3. Mark the end of the reversed word
current.is_end = True

def query(self, letter: str) -> bool:
"""
Query Time: O(L), where L is the length of the stream. This is because we only traverse the trie up to the
length of the stream.

Query Time: O(Lmax), where Lmax is the length of the longest word (up to 200). Since this is a constant limit,
we can treat this as O(1) amortized time per query.

Parameters:
letter (str): The next letter in the stream.

Returns:
bool: True if the letter is the end of a word, False otherwise.
"""
self.stream.append(letter)
current = self.trie

# Iterate stream in reverse (newest character first)
for character in reversed(self.stream):
# Check for dead end (critical for query logic)
if character not in current.children:
return False

# Traverse to the next node
current = current.children[character]

# check for match(success condition)
if current.is_end:
return True

# If loop finishes without a match
return False
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
35 changes: 35 additions & 0 deletions datastructures/streams/stream_checker/test_stream_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import unittest
from . import StreamChecker


class StreamCheckerTestCase(unittest.TestCase):
def test_1(self):
words = ["go", "hi"]
stream = StreamChecker(words)
self.assertFalse(stream.query("h"))
self.assertTrue(stream.query("i"))
self.assertFalse(stream.query("g"))
self.assertTrue(stream.query("o"))
self.assertFalse(stream.query("x"))
self.assertFalse(stream.query("y"))

def test_2(self):
words = ["no", "yes"]
stream = StreamChecker(words)
self.assertFalse(stream.query("y"))
self.assertFalse(stream.query("e"))
self.assertTrue(stream.query("s"))
self.assertFalse(stream.query("n"))
self.assertTrue(stream.query("o"))

def test_3(self):
words = ["a", "aa"]
stream = StreamChecker(words)
self.assertTrue(stream.query("a"))
self.assertTrue(stream.query("a"))
self.assertTrue(stream.query("a"))
self.assertFalse(stream.query("b"))


if __name__ == '__main__':
unittest.main()
69 changes: 6 additions & 63 deletions datastructures/trees/trie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,8 @@
from collections import defaultdict
from typing import List
from datastructures.trees.trie.trie_node import TrieNode
from datastructures.trees.trie.trie import Trie


class TrieNode:
def __init__(self, char: str):
self.char = char
self.children = defaultdict(TrieNode)
self.is_end = False


class Trie:
def __init__(self):
self.root = TrieNode("")

def insert(self, word: str) -> None:
curr = self.root

for char in word:
if char in curr.children:
curr = curr.children[char]

else:
new_node = TrieNode(char)
curr.children[char] = new_node
curr = new_node

curr.is_end = True

def search(self, word: str) -> List[str]:
curr = self.root

if len(word) == 0:
return []

for char in word:
if char in curr.children:
curr = curr.children[char]
else:
return []

output = []

def dfs(node: TrieNode, prefix: str) -> None:
if node.is_end:
output.append((prefix + node.char))

for child in node.children.values():
dfs(child, prefix + node.char)

dfs(curr, word[:-1])
return output

def starts_with(self, prefix: str) -> bool:
"""
Returns true if the given prefix is a prefix of any word in the trie.
"""
curr = self.root

for char in prefix:
if char not in curr.children:
return False
curr = curr.children[char]

return True
__all__ = [
"Trie",
"TrieNode"
]
57 changes: 57 additions & 0 deletions datastructures/trees/trie/trie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from typing import List
from datastructures.trees.trie.trie_node import TrieNode


class Trie:
def __init__(self):
self.root = TrieNode()

def insert(self, word: str) -> None:
curr = self.root

for char in word:
if char in curr.children:
curr = curr.children[char]
else:
new_node = TrieNode()
curr.children[char] = new_node
curr = new_node

curr.is_end = True

def search(self, word: str) -> List[str]:
curr = self.root

if len(word) == 0:
return []

for char in word:
if char in curr.children:
curr = curr.children[char]
else:
return []

output = []

def dfs(node: TrieNode, prefix: str) -> None:
if node.is_end:
output.append((prefix + node.char))

for child in node.children.values():
dfs(child, prefix + node.char)

dfs(curr, word[:-1])
return output

def starts_with(self, prefix: str) -> bool:
"""
Returns true if the given prefix is a prefix of any word in the trie.
"""
curr = self.root

for char in prefix:
if char not in curr.children:
return False
curr = curr.children[char]

return True
23 changes: 23 additions & 0 deletions datastructures/trees/trie/trie_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from typing import DefaultDict
from collections import defaultdict


class TrieNode:
def __init__(self):
# self.char = char
"""
Initializes a TrieNode instance.

A TrieNode contains a character and a dictionary of its children. It also contains a boolean indicating whether the node is the end of a word in the Trie.

Parameters:
None

Returns:
None
"""
self.children: DefaultDict[str, TrieNode] = defaultdict(TrieNode)
self.is_end = False

def __repr__(self):
return f"TrieNode({self.children.items()}, {self.is_end})"
Loading