Langchain
Pre-requirements
scikit-learn
pytorch
torchvision
torchaudio
huggingface
transformers
langchain
InstructorEmbedding
sentence_transformers
chardet
charset-normalizer==3.1.0
youtube-transcript-api
faiss-gpuDocuments Loaders
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
video_url = "https://www.youtube.com/watch?v=Jr8gLJr9WKQ&ab_channel=EnglishSkillsMastery"
chunk_size = 1000
chunk_overlap = 100
loader = YoutubeLoader.from_youtube_url(video_url)
transcript = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)
docs = text_splitter.split_documents(transcript)
docs
# [Document(page_content="English by mimicking...Embedding
Vectorstores
LLM
Last updated