LangChain Memory

Memory components allow your chains and agents to remember previous interactions, making conversations more contextual and coherent.

🧠 What is Memory?

Memory in LangChain stores and retrieves information from past interactions, enabling:

Contextual conversations - Remembering what was said before
State persistence - Maintaining information across multiple calls
Personalization - Tailoring responses based on user history
Learning - Improving responses over time

💬 Conversation Memory

ConversationBufferMemory

Stores the entire conversation history.

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
 
# Initialize memory
memory = ConversationBufferMemory()
 
# Create conversation chain with memory
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=memory,
    verbose=True
)
 
# Start conversation
response1 = conversation.predict(input="Hi! I'm working on a Python project.")
response2 = conversation.predict(input="Can you suggest some libraries for data analysis?")
 
# View the memory
print("Memory buffer:")
print(memory.buffer)
 
# View chat history
print("\nChat history:")
print(memory.chat_memory.messages)
 
# Access specific parts
print(f"\nHuman messages: {memory.chat_memory.messages[::2]}")
print(f"AI messages: {memory.chat_memory.messages[1::2]}")

ConversationBufferWindowMemory

Remembers only the last K interactions to manage token limits.

from langchain.memory import ConversationBufferWindowMemory
 
# Remember last 3 exchanges
window_memory = ConversationBufferWindowMemory(k=3, return_messages=True)
 
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=window_memory,
    verbose=True
)
 
# Long conversation
conversation.predict(input="My name is Alice.")
conversation.predict(input="I'm a software developer.")
conversation.predict(input="I love working with Python.")
conversation.predict(input="What's my name?")  # Remembers
conversation.predict(input="What do I do?")   # Remembers
conversation.predict(input="My first message was?")  # Forgets (too far back)
 
# Check what's in memory
print(f"Messages in memory: {len(window_memory.chat_memory.messages)}")

ConversationSummaryMemory

Summarizes conversations to save tokens.

from langchain.memory import ConversationSummaryMemory
 
# Initialize with LLM for summarization
summary_memory = ConversationSummaryMemory(llm=ChatOpenAI())
 
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=summary_memory,
    verbose=True
)
 
# Have a detailed conversation
conversation.predict(input="I'm planning to build a web application using React. The app will be a task management tool with features for creating, editing, and deleting tasks. Users should be able to set priorities and due dates.")
conversation.predict(input="What backend technologies would you recommend for this React app?")
 
# View the summary
print("Conversation Summary:")
print(summary_memory.buffer)

🔍 Entity Memory

SimpleEntityMemory

Tracks specific entities and their properties.

from langchain.memory import SimpleEntityMemory
 
entity_memory = SimpleEntityMemory()
 
# Create a conversation that tracks entities
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
 
template = """You are a helpful assistant that tracks information about people.
Current entities: {entities}
 
Human: {input}
AI:"""
 
prompt = PromptTemplate(
    input_variables=["entities", "input"],
    template=template
)
 
chain = LLMChain(
    llm=ChatOpenAI(),
    prompt=prompt,
    memory=entity_memory,
    verbose=True
)
 
# Extract entity information
chain.run("My name is John and I work at Google as a software engineer.")
chain.run("My colleague Sarah is a product manager at the same company.")
 
# Check stored entities
print("Stored entities:")
for entity, properties in entity_memory.entity_store.store.items():
    print(f"{entity}: {properties}")
 
# Access specific entity
john_info = entity_memory.entity_store.get("John")
print(f"\nJohn's info: {john_info}")

🔧 Advanced Memory Types

ConversationTokenBufferMemory

Limits memory based on token count rather than message count.

from langchain.memory import ConversationTokenBufferMemory
 
# Limit memory to 1000 tokens
token_memory = ConversationTokenBufferMemory(
    llm=ChatOpenAI(),
    max_token_limit=1000,
    return_messages=True
)
 
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=token_memory,
    verbose=True
)
 
# This will automatically prune old messages when token limit is exceeded
for i in range(10):
    conversation.predict(input=f"Tell me something interesting about topic {i}.")
 
print(f"Current token count: ~{len(token_memory.buffer) * 4}")  # Rough estimate

VectorStoreRetrieverMemory

Uses vector similarity to find relevant past interactions.

from langchain.memory import VectorStoreRetrieverMemory
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
 
# Create vector store for semantic search
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(embedding_function=embeddings)
 
# Create retriever memory
retriever_memory = VectorStoreRetrieverMemory(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    memory_key="chat_history"
)
 
# Use in a conversation chain
template = """Answer the question based on the following context and chat history:
Chat History: {chat_history}
Context: {context}
Question: {question}
Answer:"""
 
prompt = PromptTemplate(
    input_variables=["chat_history", "context", "question"],
    template=template
)
 
# Create a chain that uses the memory
from langchain.chains import LLMChain
chain = LLMChain(
    llm=ChatOpenAI(),
    prompt=prompt,
    memory=retriever_memory,
    verbose=True
)
 
# Example usage
chain.invoke({
    "context": "You are helping with programming questions.",
    "question": "How do I create a class in Python?"
})
 
chain.invoke({
    "context": "You are helping with programming questions.",
    "question": "What about inheritance?"  # Will retrieve relevant previous interactions
})

📝 Custom Memory

Building Your Own Memory Component

from langchain.schema import BaseChatMessageHistory
from langchain.memory import ChatMessageHistory
from typing import Dict, List
 
class ProjectMemory(BaseChatMessageHistory):
    """Custom memory that tracks project-related information."""
 
    def __init__(self):
        super().__init__()
        self.project_info: Dict[str, str] = {}
        self.decisions: List[Dict[str, str]] = []
 
    def add_project_detail(self, key: str, value: str):
        """Add project-specific information."""
        self.project_info[key] = value
 
    def add_decision(self, decision: str, reason: str):
        """Track project decisions."""
        self.decisions.append({
            "decision": decision,
            "reason": reason,
            "timestamp": str(datetime.now())
        })
 
    def get_project_summary(self) -> str:
        """Generate a summary of project information."""
        summary = "Project Details:\n"
        for key, value in self.project_info.items():
            summary += f"- {key}: {value}\n"
 
        if self.decisions:
            summary += "\nRecent Decisions:\n"
            for decision in self.decisions[-3:]:  # Last 3 decisions
                summary += f"- {decision['decision']}: {decision['reason']}\n"
 
        return summary
 
# Use custom memory in a chain
project_memory = ProjectMemory()
project_memory.add_project_detail("name", "AI Task Manager")
project_memory.add_project_detail("tech_stack", "React, Node.js, MongoDB")
 
template = """You are helping with a software project.
{project_summary}
 
Human: {input}
AI:"""
 
prompt = PromptTemplate(
    input_variables=["project_summary", "input"],
    template=template
)
 
chain = LLMChain(
    llm=ChatOpenAI(),
    prompt=prompt,
    verbose=True
)
 
# Use with project memory
result = chain.invoke({
    "project_summary": project_memory.get_project_summary(),
    "input": "What architecture would you recommend for this project?"
})
 
# Add a decision made
project_memory.add_decision(
    "Architecture choice",
    "Recommended microservices architecture for scalability"
)

🔄 Memory Persistence

Saving and Loading Memory

import json
from langchain.memory import ConversationBufferMemory
 
# Create memory with some conversation
memory = ConversationBufferMemory()
conversation = ConversationChain(llm=ChatOpenAI(), memory=memory)
conversation.predict(input="I'm building a weather app.")
 
# Save memory to file
def save_memory(memory, filename):
    # Convert messages to serializable format
    messages = [
        {"type": type(msg).__name__, "content": msg.content}
        for msg in memory.chat_memory.messages
    ]
 
    with open(filename, 'w') as f:
        json.dump(messages, f, indent=2)
 
def load_memory(filename):
    with open(filename, 'r') as f:
        messages_data = json.load(f)
 
    memory = ConversationBufferMemory()
    for msg_data in messages_data:
        # Reconstruct messages (simplified)
        from langchain.schema import HumanMessage, AIMessage
        if msg_data["type"] == "HumanMessage":
            memory.chat_memory.add_user_message(msg_data["content"])
        elif msg_data["type"] == "AIMessage":
            memory.chat_memory.add_ai_message(msg_data["content"])
 
    return memory
 
# Save memory
save_memory(memory, "conversation_history.json")
 
# Load memory later
loaded_memory = load_memory("conversation_history.json")
new_conversation = ConversationChain(llm=ChatOpenAI(), memory=loaded_memory)

🎯 Real-World Examples

Customer Support Bot with Memory

from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
 
# Create memory that summarizes when getting too long
support_memory = ConversationSummaryBufferMemory(
    llm=ChatOpenAI(),
    max_token_limit=1000,  # Summarize when exceeding this limit
    return_messages=True
)
 
support_bot = ConversationChain(
    llm=ChatOpenAI(temperature=0.2),  # Lower temperature for consistency
    memory=support_memory,
    verbose=True
)
 
# Simulate support conversation
print("Customer Support Session:")
response1 = support_bot.predict(input="Hi! I'm having trouble with my order #12345.")
response2 = support_bot.predict(input="The package hasn't arrived yet and it's been 2 weeks.")
response3 = support_bot.predict(input="Can you check the tracking status for me?")
response4 = support_bot.predict(input="What was my order number again?")  # Should remember
 
print(f"\nMemory summary: {support_memory.summary}")

Personal Tutor with Memory

from langchain.memory import ConversationKGMemory  # Knowledge Graph memory
from langchain.chains import ConversationChain
 
# Memory that builds a knowledge graph of topics discussed
kg_memory = ConversationKGMemory(llm=ChatOpenAI())
 
tutor = ConversationChain(
    llm=ChatOpenAI(temperature=0.3),
    memory=kg_memory,
    verbose=True
)
 
# Tutoring session
print("Tutoring Session:")
tutor.predict(input="I'm learning about machine learning.")
tutor.predict(input="Can you explain neural networks? I'm confused about backpropagation.")
tutor.predict(input="What's the difference between supervised and unsupervised learning?")
 
# View the knowledge graph
print(f"Knowledge Graph Entities: {kg_memory.kg.get_triples()}")

🎛️ Memory Configuration

Advanced Memory Settings

from langchain.memory import ConversationBufferMemory
from langchain.schema import BaseMessage
 
# Configure memory with custom message formatting
memory = ConversationBufferMemory(
    return_messages=True,  # Return message objects instead of strings
    human_prefix="User",    # Custom prefix for human messages
    ai_prefix="Assistant",  # Custom prefix for AI messages
    memory_key="chat_history"  # Custom key for prompt templates
)
 
# Add memory callbacks
from langchain.callbacks.base import BaseCallbackHandler
 
class MemoryTracker(BaseCallbackHandler):
    def on_llm_end(self, response, **kwargs):
        print(f"Memory usage: {len(memory.chat_memory.messages)} messages")
 
# Use with callbacks
conversation = ConversationChain(
    llm=ChatOpenAI(callbacks=[MemoryTracker()]),
    memory=memory,
    verbose=True
)

🎯 Best Practices

1. Choose the Right Memory Type

BufferMemory: Simple applications, short conversations
WindowMemory: Long conversations with recent context
SummaryMemory: Extended conversations needing context
KG Memory: Complex topic relationships
EntityMemory: Tracking specific people/objects

2. Manage Token Costs

# Monitor memory usage
def check_memory_size(memory):
    message_count = len(memory.chat_memory.messages)
    estimated_tokens = sum(len(msg.content.split()) * 1.3 for msg in memory.chat_memory.messages)
    print(f"Messages: {message_count}, Estimated tokens: {estimated_tokens:.0f}")
 
check_memory_size(memory)

3. Clear Memory When Necessary

# Clear specific parts of memory
memory.chat_memory.clear()
 
# Or reset specific conversation
memory.clear()

4. Add Metadata to Messages

from langchain.schema import HumanMessage, AIMessage
 
# Add messages with metadata
memory.chat_memory.add_message(
    HumanMessage(content="Help with Python", additional_kwargs={"topic": "programming"})
)
 
# Filter messages by metadata
programming_messages = [
    msg for msg in memory.chat_memory.messages
    if msg.additional_kwargs.get("topic") == "programming"
]

5. Combine Memory Types

from langchain.memory import CombinedMemory
 
# Combine different memory types
buffer_memory = ConversationBufferMemory()
entity_memory = SimpleEntityMemory()
 
combined_memory = CombinedMemory(
    memories=[buffer_memory, entity_memory],
    input_keys=["input"],
    output_keys=["output"]
)

Memory components are essential for creating engaging, context-aware AI applications. Next, explore LangGraph for building more complex, stateful workflows.

Chains Introduction