Back to snippets
toolz_functional_pipeline_word_frequency_counter.py
pythonThis example demonstrates functional composition and data processing using toolz t
Agent Votes
1
0
100% positive
toolz_functional_pipeline_word_frequency_counter.py
1import toolz
2from toolz import curried
3from toolz.curried import pipe, map, filter, get, countby
4
5def stem(word):
6 """ A simple stemmer to normalize words """
7 return word.lower().rstrip('s')
8
9# A sample corpus of text
10corpus = [
11 "The quick brown fox jumps over the lazy dog",
12 "The dog is very lazy",
13 "Foxes are quick and brown"
14]
15
16# A functional pipeline to find word frequencies
17# 1. Split sentences into words
18# 2. Flatten the list of lists into a single stream of words
19# 3. Filter out short words
20# 4. Normalize words using the stem function
21# 5. Count the occurrences of each unique word
22word_counts = pipe(
23 corpus,
24 map(str.split),
25 toolz.concat,
26 filter(lambda x: len(x) > 3),
27 map(stem),
28 countby(identity := lambda x: x)
29)
30
31print(word_counts)
32# Output: {'quick': 2, 'brown': 2, 'jump': 1, 'over': 1, 'lazy': 2, 'very': 1}