LangChain Components
Components are the fundamental building blocks of LangChain. Understanding these components is essential for building powerful AI applications.
π€ Models
Language Models (LLMs)
The core interface for interacting with language models.
from langchain_openai import OpenAI, ChatOpenAI
# Basic LLM (text completion)
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.7)
response = llm("What is the capital of France?")
print(response)
# Output: "The capital of France is Paris."
# Chat Model (conversation-based)
chat_model = ChatOpenAI(model="gpt-4", temperature=0.7)
from langchain.schema import HumanMessage
messages = [HumanMessage(content="What is the capital of France?")]
response = chat_model(messages)
print(response.content)
# Output: "The capital of France is Paris."Model Types Comparison
| Type | Use Case | Example |
|---|---|---|
| LLM | Text completion, generation | OpenAI, Anthropic |
| Chat | Conversational AI | GPT-4, Claude |
| Embedding | Text similarity, search | text-embedding-ada-002 |
π Prompts
Prompt Templates
Create reusable prompt patterns with variables.
from langchain.prompts import PromptTemplate, ChatPromptTemplate
# Simple prompt template
template = PromptTemplate(
input_variables=["product", "audience"],
template="Write a {audience}-focused description for {product}."
)
prompt = template.format(product="AI assistant", audience="developers")
print(prompt)
# Output: "Write a developers-focused description for AI assistant."
# Chat prompt template (system + human messages)
chat_template = ChatPromptTemplate.from_messages([
("system", "You are a helpful {role} assistant."),
("human", "{question}")
])
messages = chat_template.format_messages(
role="technical support",
question="How do I fix a memory leak?"
)Few-shot Learning
Provide examples to improve model performance.
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate
examples = [
{
"question": "What is 2+2?",
"answer": "4"
},
{
"question": "What is 5+3?",
"answer": "8"
}
]
example_prompt = PromptTemplate(
input_variables=["question", "answer"],
template="Question: {question}\nAnswer: {answer}"
)
few_shot_prompt = FewShotPromptTemplate(
examples=examples,
example_prompt=example_prompt,
prefix="Answer the following questions:",
suffix="Question: {input}\nAnswer:",
input_variables=["input"]
)
# Use with examples
prompt = few_shot_prompt.format(input="What is 7+2?")
print(prompt)π Output Parsers
Structured Output
Parse LLM responses into structured data formats.
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate
# Define response schema
response_schemas = [
ResponseSchema(name="confidence", description="Confidence score (0-1)"),
ResponseSchema(name="reasoning", description="Reasoning for the answer"),
ResponseSchema(name="answer", description="The final answer")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
# Create format instructions
format_instructions = output_parser.get_format_instructions()
# Template with format instructions
template = """Answer the user question.
{format_instructions}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
# Use the chain
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
chain = prompt | llm | output_parser
response = chain.invoke({
"question": "What is machine learning?",
"format_instructions": format_instructions
})
print(response)
# Output: {
# "confidence": 0.9,
# "reasoning": "Machine learning is...",
# "answer": "Machine learning is a subset of artificial intelligence..."
# }Pydantic Parser
Type-safe parsing with Pydantic models.
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
# Define the data model
class ProductAnalysis(BaseModel):
product_name: str = Field(description="Name of the product")
features: list[str] = Field(description="List of key features")
price_range: str = Field(description="Estimated price range")
rating: float = Field(description="Customer rating (1-5)")
# Create parser
parser = PydanticOutputParser(pydantic_object=ProductAnalysis)
# Template with format instructions
template = """Analyze the following product:
{product}
{format_instructions}
"""
prompt = ChatPromptTemplate.from_template(template)
# Use the chain
chain = prompt | ChatOpenAI() | parser
result = chain.invoke({
"product": "iPhone 15 Pro",
"format_instructions": parser.get_format_instructions()
})
print(result.product_name) # "iPhone 15 Pro"
print(result.features) # ["A17 Pro chip", "Titanium design", ...]
print(result.rating) # 4.5π Text Splitters
Splitting Documents
Break large documents into manageable chunks.
from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter
# Character-based splitter
char_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len,
separators=["\n\n", "\n", " ", ""]
)
# Token-based splitter (better for LLMs)
token_splitter = TokenTextSplitter(
chunk_size=500,
chunk_overlap=50
)
# Example usage
long_text = """
This is a very long document that needs to be split into smaller chunks...
[more text here]
"""
# Split the text
chunks = char_splitter.split_text(long_text)
print(f"Created {len(chunks)} chunks")
print(f"First chunk: {chunks[0][:100]}...")π Document Loaders
Loading Different File Types
from langchain_community.document_loaders import (
TextLoader,
PyPDFLoader,
WebBaseLoader,
CSVLoader,
UnstructuredMarkdownLoader
)
# Load text files
text_loader = TextLoader("document.txt")
text_docs = text_loader.load()
# Load PDF files
pdf_loader = PyPDFLoader("report.pdf")
pdf_docs = pdf_loader.load()
# Load web pages
web_loader = WebBaseLoader("https://example.com")
web_docs = web_loader.load()
# Load CSV files
csv_loader = CSVLoader("data.csv")
csv_docs = csv_loader.load()
# Load Markdown files
md_loader = UnstructuredMarkdownLoader("README.md")
md_docs = md_loader.load()
# Access document content
for doc in text_docs:
print(f"Content: {doc.page_content[:100]}...")
print(f"Metadata: {doc.metadata}")πͺ Vector Stores
Store and Retrieve Embeddings
from langchain_community.vectorstores import FAISS, Chroma
from langchain_openai import OpenAIEmbeddings
# Initialize embeddings
embeddings = OpenAIEmbeddings()
# Sample texts
texts = [
"Machine learning is a subset of artificial intelligence",
"Deep learning uses neural networks with multiple layers",
"Natural language processing helps computers understand text"
]
# Create FAISS vector store
faiss_store = FAISS.from_texts(texts, embeddings)
# Search for similar documents
query = "AI and neural networks"
results = faiss_store.similarity_search(query, k=2)
for doc in results:
print(f"Content: {doc.page_content}")
print(f"Score: {doc.metadata.get('score', 'N/A')}")π― Best Practices
1. Choose the Right Model
- Use LLM for text completion tasks
- Use Chat for conversational applications
- Consider cost vs performance trade-offs
2. Optimize Prompts
- Be specific and clear
- Provide examples (few-shot learning)
- Use system prompts for consistent behavior
3. Handle Errors Gracefully
from langchain.schema import BaseOutputParser
from typing import Optional
class SafeOutputParser(BaseOutputParser):
def parse(self, text: str) -> Optional[dict]:
try:
# Your parsing logic here
return {"result": text.strip()}
except Exception as e:
print(f"Parse error: {e}")
return {"error": str(e), "original": text}4. Memory Management
- Use chunking for large documents
- Implement caching for repeated queries
- Monitor token usage to control costs
These components are the foundation for building sophisticated AI applications. Next, explore how to combine them into powerful Chains.