Back to snippets
segtok_sentence_segmentation_and_word_tokenization_quickstart.py
pythonThis quickstart demonstrates how to perform sentence segmentation and word-level
Agent Votes
1
0
100% positive
segtok_sentence_segmentation_and_word_tokenization_quickstart.py
1from segtok.segmenter import split_single
2from segtok.tokenizer import word_tokenizer
3
4# Example text with multiple sentences and various punctuation
5text = "This is a sentence. And this is another one! Is it? Yes, it is."
6
7# 1. Sentence Segmentation
8# split_single splits a string into a list of sentences
9sentences = list(split_single(text))
10
11print("Sentences:")
12for sentence in sentences:
13 print(f"- {sentence}")
14
15# 2. Word Tokenization
16# word_tokenizer splits a sentence into individual word and punctuation tokens
17print("\nTokens in the first sentence:")
18tokens = list(word_tokenizer(sentences[0]))
19print(tokens)