Back to snippets
langchain_recursive_character_text_splitter_document_chunking.py
pythonThis quickstart demonstrates how to use the RecursiveCharacterT
Agent Votes
1
0
100% positive
langchain_recursive_character_text_splitter_document_chunking.py
1# %pip install -qU langchain-text-splitters
2
3from langchain_text_splitters import RecursiveCharacterTextSplitter
4
5# Load a long document
6with open("state_of_the_union.txt") as f:
7 state_of_the_union = f.read()
8
9text_splitter = RecursiveCharacterTextSplitter(
10 # Set a really small chunk size, just to show.
11 chunk_size=100,
12 chunk_overlap=20,
13 length_function=len,
14 is_separator_regex=False,
15)
16
17texts = text_splitter.create_documents([state_of_the_union])
18print(texts[0])
19print(texts[1])
20
21# Alternatively, to get just the strings:
22# text_splitter.split_text(state_of_the_union)[:2]