LangChain Memory
Memory components allow your chains and agents to remember previous interactions, making conversations more contextual and coherent.
๐ง What is Memory?
Memory in LangChain stores and retrieves information from past interactions, enabling:
- Contextual conversations - Remembering what was said before
- State persistence - Maintaining information across multiple calls
- Personalization - Tailoring responses based on user history
- Learning - Improving responses over time
๐ฌ Conversation Memory
ConversationBufferMemory
Stores the entire conversation history.
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
# Initialize memory
memory = ConversationBufferMemory()
# Create conversation chain with memory
conversation = ConversationChain(
llm=ChatOpenAI(),
memory=memory,
verbose=True
)
# Start conversation
response1 = conversation.predict(input="Hi! I'm working on a Python project.")
response2 = conversation.predict(input="Can you suggest some libraries for data analysis?")
# View the memory
print("Memory buffer:")
print(memory.buffer)
# View chat history
print("\nChat history:")
print(memory.chat_memory.messages)
# Access specific parts
print(f"\nHuman messages: {memory.chat_memory.messages[::2]}")
print(f"AI messages: {memory.chat_memory.messages[1::2]}")ConversationBufferWindowMemory
Remembers only the last K interactions to manage token limits.
from langchain.memory import ConversationBufferWindowMemory
# Remember last 3 exchanges
window_memory = ConversationBufferWindowMemory(k=3, return_messages=True)
conversation = ConversationChain(
llm=ChatOpenAI(),
memory=window_memory,
verbose=True
)
# Long conversation
conversation.predict(input="My name is Alice.")
conversation.predict(input="I'm a software developer.")
conversation.predict(input="I love working with Python.")
conversation.predict(input="What's my name?") # Remembers
conversation.predict(input="What do I do?") # Remembers
conversation.predict(input="My first message was?") # Forgets (too far back)
# Check what's in memory
print(f"Messages in memory: {len(window_memory.chat_memory.messages)}")ConversationSummaryMemory
Summarizes conversations to save tokens.
from langchain.memory import ConversationSummaryMemory
# Initialize with LLM for summarization
summary_memory = ConversationSummaryMemory(llm=ChatOpenAI())
conversation = ConversationChain(
llm=ChatOpenAI(),
memory=summary_memory,
verbose=True
)
# Have a detailed conversation
conversation.predict(input="I'm planning to build a web application using React. The app will be a task management tool with features for creating, editing, and deleting tasks. Users should be able to set priorities and due dates.")
conversation.predict(input="What backend technologies would you recommend for this React app?")
# View the summary
print("Conversation Summary:")
print(summary_memory.buffer)๐ Entity Memory
SimpleEntityMemory
Tracks specific entities and their properties.
from langchain.memory import SimpleEntityMemory
entity_memory = SimpleEntityMemory()
# Create a conversation that tracks entities
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
template = """You are a helpful assistant that tracks information about people.
Current entities: {entities}
Human: {input}
AI:"""
prompt = PromptTemplate(
input_variables=["entities", "input"],
template=template
)
chain = LLMChain(
llm=ChatOpenAI(),
prompt=prompt,
memory=entity_memory,
verbose=True
)
# Extract entity information
chain.run("My name is John and I work at Google as a software engineer.")
chain.run("My colleague Sarah is a product manager at the same company.")
# Check stored entities
print("Stored entities:")
for entity, properties in entity_memory.entity_store.store.items():
print(f"{entity}: {properties}")
# Access specific entity
john_info = entity_memory.entity_store.get("John")
print(f"\nJohn's info: {john_info}")๐ง Advanced Memory Types
ConversationTokenBufferMemory
Limits memory based on token count rather than message count.
from langchain.memory import ConversationTokenBufferMemory
# Limit memory to 1000 tokens
token_memory = ConversationTokenBufferMemory(
llm=ChatOpenAI(),
max_token_limit=1000,
return_messages=True
)
conversation = ConversationChain(
llm=ChatOpenAI(),
memory=token_memory,
verbose=True
)
# This will automatically prune old messages when token limit is exceeded
for i in range(10):
conversation.predict(input=f"Tell me something interesting about topic {i}.")
print(f"Current token count: ~{len(token_memory.buffer) * 4}") # Rough estimateVectorStoreRetrieverMemory
Uses vector similarity to find relevant past interactions.
from langchain.memory import VectorStoreRetrieverMemory
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
# Create vector store for semantic search
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(embedding_function=embeddings)
# Create retriever memory
retriever_memory = VectorStoreRetrieverMemory(
retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
memory_key="chat_history"
)
# Use in a conversation chain
template = """Answer the question based on the following context and chat history:
Chat History: {chat_history}
Context: {context}
Question: {question}
Answer:"""
prompt = PromptTemplate(
input_variables=["chat_history", "context", "question"],
template=template
)
# Create a chain that uses the memory
from langchain.chains import LLMChain
chain = LLMChain(
llm=ChatOpenAI(),
prompt=prompt,
memory=retriever_memory,
verbose=True
)
# Example usage
chain.invoke({
"context": "You are helping with programming questions.",
"question": "How do I create a class in Python?"
})
chain.invoke({
"context": "You are helping with programming questions.",
"question": "What about inheritance?" # Will retrieve relevant previous interactions
})๐ Custom Memory
Building Your Own Memory Component
from langchain.schema import BaseChatMessageHistory
from langchain.memory import ChatMessageHistory
from typing import Dict, List
class ProjectMemory(BaseChatMessageHistory):
"""Custom memory that tracks project-related information."""
def __init__(self):
super().__init__()
self.project_info: Dict[str, str] = {}
self.decisions: List[Dict[str, str]] = []
def add_project_detail(self, key: str, value: str):
"""Add project-specific information."""
self.project_info[key] = value
def add_decision(self, decision: str, reason: str):
"""Track project decisions."""
self.decisions.append({
"decision": decision,
"reason": reason,
"timestamp": str(datetime.now())
})
def get_project_summary(self) -> str:
"""Generate a summary of project information."""
summary = "Project Details:\n"
for key, value in self.project_info.items():
summary += f"- {key}: {value}\n"
if self.decisions:
summary += "\nRecent Decisions:\n"
for decision in self.decisions[-3:]: # Last 3 decisions
summary += f"- {decision['decision']}: {decision['reason']}\n"
return summary
# Use custom memory in a chain
project_memory = ProjectMemory()
project_memory.add_project_detail("name", "AI Task Manager")
project_memory.add_project_detail("tech_stack", "React, Node.js, MongoDB")
template = """You are helping with a software project.
{project_summary}
Human: {input}
AI:"""
prompt = PromptTemplate(
input_variables=["project_summary", "input"],
template=template
)
chain = LLMChain(
llm=ChatOpenAI(),
prompt=prompt,
verbose=True
)
# Use with project memory
result = chain.invoke({
"project_summary": project_memory.get_project_summary(),
"input": "What architecture would you recommend for this project?"
})
# Add a decision made
project_memory.add_decision(
"Architecture choice",
"Recommended microservices architecture for scalability"
)๐ Memory Persistence
Saving and Loading Memory
import json
from langchain.memory import ConversationBufferMemory
# Create memory with some conversation
memory = ConversationBufferMemory()
conversation = ConversationChain(llm=ChatOpenAI(), memory=memory)
conversation.predict(input="I'm building a weather app.")
# Save memory to file
def save_memory(memory, filename):
# Convert messages to serializable format
messages = [
{"type": type(msg).__name__, "content": msg.content}
for msg in memory.chat_memory.messages
]
with open(filename, 'w') as f:
json.dump(messages, f, indent=2)
def load_memory(filename):
with open(filename, 'r') as f:
messages_data = json.load(f)
memory = ConversationBufferMemory()
for msg_data in messages_data:
# Reconstruct messages (simplified)
from langchain.schema import HumanMessage, AIMessage
if msg_data["type"] == "HumanMessage":
memory.chat_memory.add_user_message(msg_data["content"])
elif msg_data["type"] == "AIMessage":
memory.chat_memory.add_ai_message(msg_data["content"])
return memory
# Save memory
save_memory(memory, "conversation_history.json")
# Load memory later
loaded_memory = load_memory("conversation_history.json")
new_conversation = ConversationChain(llm=ChatOpenAI(), memory=loaded_memory)๐ฏ Real-World Examples
Customer Support Bot with Memory
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
# Create memory that summarizes when getting too long
support_memory = ConversationSummaryBufferMemory(
llm=ChatOpenAI(),
max_token_limit=1000, # Summarize when exceeding this limit
return_messages=True
)
support_bot = ConversationChain(
llm=ChatOpenAI(temperature=0.2), # Lower temperature for consistency
memory=support_memory,
verbose=True
)
# Simulate support conversation
print("Customer Support Session:")
response1 = support_bot.predict(input="Hi! I'm having trouble with my order #12345.")
response2 = support_bot.predict(input="The package hasn't arrived yet and it's been 2 weeks.")
response3 = support_bot.predict(input="Can you check the tracking status for me?")
response4 = support_bot.predict(input="What was my order number again?") # Should remember
print(f"\nMemory summary: {support_memory.summary}")Personal Tutor with Memory
from langchain.memory import ConversationKGMemory # Knowledge Graph memory
from langchain.chains import ConversationChain
# Memory that builds a knowledge graph of topics discussed
kg_memory = ConversationKGMemory(llm=ChatOpenAI())
tutor = ConversationChain(
llm=ChatOpenAI(temperature=0.3),
memory=kg_memory,
verbose=True
)
# Tutoring session
print("Tutoring Session:")
tutor.predict(input="I'm learning about machine learning.")
tutor.predict(input="Can you explain neural networks? I'm confused about backpropagation.")
tutor.predict(input="What's the difference between supervised and unsupervised learning?")
# View the knowledge graph
print(f"Knowledge Graph Entities: {kg_memory.kg.get_triples()}")๐๏ธ Memory Configuration
Advanced Memory Settings
from langchain.memory import ConversationBufferMemory
from langchain.schema import BaseMessage
# Configure memory with custom message formatting
memory = ConversationBufferMemory(
return_messages=True, # Return message objects instead of strings
human_prefix="User", # Custom prefix for human messages
ai_prefix="Assistant", # Custom prefix for AI messages
memory_key="chat_history" # Custom key for prompt templates
)
# Add memory callbacks
from langchain.callbacks.base import BaseCallbackHandler
class MemoryTracker(BaseCallbackHandler):
def on_llm_end(self, response, **kwargs):
print(f"Memory usage: {len(memory.chat_memory.messages)} messages")
# Use with callbacks
conversation = ConversationChain(
llm=ChatOpenAI(callbacks=[MemoryTracker()]),
memory=memory,
verbose=True
)๐ฏ Best Practices
1. Choose the Right Memory Type
- BufferMemory: Simple applications, short conversations
- WindowMemory: Long conversations with recent context
- SummaryMemory: Extended conversations needing context
- KG Memory: Complex topic relationships
- EntityMemory: Tracking specific people/objects
2. Manage Token Costs
# Monitor memory usage
def check_memory_size(memory):
message_count = len(memory.chat_memory.messages)
estimated_tokens = sum(len(msg.content.split()) * 1.3 for msg in memory.chat_memory.messages)
print(f"Messages: {message_count}, Estimated tokens: {estimated_tokens:.0f}")
check_memory_size(memory)3. Clear Memory When Necessary
# Clear specific parts of memory
memory.chat_memory.clear()
# Or reset specific conversation
memory.clear()4. Add Metadata to Messages
from langchain.schema import HumanMessage, AIMessage
# Add messages with metadata
memory.chat_memory.add_message(
HumanMessage(content="Help with Python", additional_kwargs={"topic": "programming"})
)
# Filter messages by metadata
programming_messages = [
msg for msg in memory.chat_memory.messages
if msg.additional_kwargs.get("topic") == "programming"
]5. Combine Memory Types
from langchain.memory import CombinedMemory
# Combine different memory types
buffer_memory = ConversationBufferMemory()
entity_memory = SimpleEntityMemory()
combined_memory = CombinedMemory(
memories=[buffer_memory, entity_memory],
input_keys=["input"],
output_keys=["output"]
)Memory components are essential for creating engaging, context-aware AI applications. Next, explore LangGraph for building more complex, stateful workflows.