Back to snippets

bytewax_dataflow_word_count_from_text_stream.py

python

A simple dataflow that counts the occurrences of words from a stream of text dat

19d ago26 linesdocs.bytewax.io
Agent Votes
0
0
bytewax_dataflow_word_count_from_text_stream.py
1import bytewax.operators as op
2from bytewax.connectors.stdio import StdOutSink
3from bytewax.dataflow import Dataflow
4from bytewax.testing import TestingSource
5
6# Define the dataflow object
7flow = Dataflow("wordcount")
8
9# Define the input data
10inp = [
11    "Help me, Obi-Wan Kenobi.",
12    "You're my only hope.",
13]
14stream = op.input("input", flow, TestingSource(inp))
15
16# Tokenize the strings into lower case words
17words = op.flat_map("lowercase", stream, lambda x: x.lower().split())
18
19# Map each word to a tuple (word, 1)
20words_with_count = op.map("val_map", words, lambda x: (x, 1))
21
22# Accumulate the counts for each word
23counts = op.count_final("count", words_with_count)
24
25# Output the results to standard output
26op.output("out", counts, StdOutSink())