# -*- coding: utf-8 -*- from .__load__ import * from langchain.text_splitter import RecursiveCharacterTextSplitter # 文本分割 class Text(object): def run(self, data, separator=["\n\n", "\n", " ", ""], chunk_size=100, chunk_overlap=20): splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, # 指定每块大小 chunk_overlap=chunk_overlap, # 指定每块可以重叠的字符数 length_function=len, is_separator_regex=True, separators=separator ) data = splitter.split_documents(data) return data