AI Technology
LangChain
Memory

LangChain Memory

Memory components allow your chains and agents to remember previous interactions, making conversations more contextual and coherent.

๐Ÿง  What is Memory?

Memory in LangChain stores and retrieves information from past interactions, enabling:

  • Contextual conversations - Remembering what was said before
  • State persistence - Maintaining information across multiple calls
  • Personalization - Tailoring responses based on user history
  • Learning - Improving responses over time

๐Ÿ’ฌ Conversation Memory

ConversationBufferMemory

Stores the entire conversation history.

from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
 
# Initialize memory
memory = ConversationBufferMemory()
 
# Create conversation chain with memory
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=memory,
    verbose=True
)
 
# Start conversation
response1 = conversation.predict(input="Hi! I'm working on a Python project.")
response2 = conversation.predict(input="Can you suggest some libraries for data analysis?")
 
# View the memory
print("Memory buffer:")
print(memory.buffer)
 
# View chat history
print("\nChat history:")
print(memory.chat_memory.messages)
 
# Access specific parts
print(f"\nHuman messages: {memory.chat_memory.messages[::2]}")
print(f"AI messages: {memory.chat_memory.messages[1::2]}")

ConversationBufferWindowMemory

Remembers only the last K interactions to manage token limits.

from langchain.memory import ConversationBufferWindowMemory
 
# Remember last 3 exchanges
window_memory = ConversationBufferWindowMemory(k=3, return_messages=True)
 
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=window_memory,
    verbose=True
)
 
# Long conversation
conversation.predict(input="My name is Alice.")
conversation.predict(input="I'm a software developer.")
conversation.predict(input="I love working with Python.")
conversation.predict(input="What's my name?")  # Remembers
conversation.predict(input="What do I do?")   # Remembers
conversation.predict(input="My first message was?")  # Forgets (too far back)
 
# Check what's in memory
print(f"Messages in memory: {len(window_memory.chat_memory.messages)}")

ConversationSummaryMemory

Summarizes conversations to save tokens.

from langchain.memory import ConversationSummaryMemory
 
# Initialize with LLM for summarization
summary_memory = ConversationSummaryMemory(llm=ChatOpenAI())
 
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=summary_memory,
    verbose=True
)
 
# Have a detailed conversation
conversation.predict(input="I'm planning to build a web application using React. The app will be a task management tool with features for creating, editing, and deleting tasks. Users should be able to set priorities and due dates.")
conversation.predict(input="What backend technologies would you recommend for this React app?")
 
# View the summary
print("Conversation Summary:")
print(summary_memory.buffer)

๐Ÿ” Entity Memory

SimpleEntityMemory

Tracks specific entities and their properties.

from langchain.memory import SimpleEntityMemory
 
entity_memory = SimpleEntityMemory()
 
# Create a conversation that tracks entities
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
 
template = """You are a helpful assistant that tracks information about people.
Current entities: {entities}
 
Human: {input}
AI:"""
 
prompt = PromptTemplate(
    input_variables=["entities", "input"],
    template=template
)
 
chain = LLMChain(
    llm=ChatOpenAI(),
    prompt=prompt,
    memory=entity_memory,
    verbose=True
)
 
# Extract entity information
chain.run("My name is John and I work at Google as a software engineer.")
chain.run("My colleague Sarah is a product manager at the same company.")
 
# Check stored entities
print("Stored entities:")
for entity, properties in entity_memory.entity_store.store.items():
    print(f"{entity}: {properties}")
 
# Access specific entity
john_info = entity_memory.entity_store.get("John")
print(f"\nJohn's info: {john_info}")

๐Ÿ”ง Advanced Memory Types

ConversationTokenBufferMemory

Limits memory based on token count rather than message count.

from langchain.memory import ConversationTokenBufferMemory
 
# Limit memory to 1000 tokens
token_memory = ConversationTokenBufferMemory(
    llm=ChatOpenAI(),
    max_token_limit=1000,
    return_messages=True
)
 
conversation = ConversationChain(
    llm=ChatOpenAI(),
    memory=token_memory,
    verbose=True
)
 
# This will automatically prune old messages when token limit is exceeded
for i in range(10):
    conversation.predict(input=f"Tell me something interesting about topic {i}.")
 
print(f"Current token count: ~{len(token_memory.buffer) * 4}")  # Rough estimate

VectorStoreRetrieverMemory

Uses vector similarity to find relevant past interactions.

from langchain.memory import VectorStoreRetrieverMemory
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
 
# Create vector store for semantic search
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(embedding_function=embeddings)
 
# Create retriever memory
retriever_memory = VectorStoreRetrieverMemory(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    memory_key="chat_history"
)
 
# Use in a conversation chain
template = """Answer the question based on the following context and chat history:
Chat History: {chat_history}
Context: {context}
Question: {question}
Answer:"""
 
prompt = PromptTemplate(
    input_variables=["chat_history", "context", "question"],
    template=template
)
 
# Create a chain that uses the memory
from langchain.chains import LLMChain
chain = LLMChain(
    llm=ChatOpenAI(),
    prompt=prompt,
    memory=retriever_memory,
    verbose=True
)
 
# Example usage
chain.invoke({
    "context": "You are helping with programming questions.",
    "question": "How do I create a class in Python?"
})
 
chain.invoke({
    "context": "You are helping with programming questions.",
    "question": "What about inheritance?"  # Will retrieve relevant previous interactions
})

๐Ÿ“ Custom Memory

Building Your Own Memory Component

from langchain.schema import BaseChatMessageHistory
from langchain.memory import ChatMessageHistory
from typing import Dict, List
 
class ProjectMemory(BaseChatMessageHistory):
    """Custom memory that tracks project-related information."""
 
    def __init__(self):
        super().__init__()
        self.project_info: Dict[str, str] = {}
        self.decisions: List[Dict[str, str]] = []
 
    def add_project_detail(self, key: str, value: str):
        """Add project-specific information."""
        self.project_info[key] = value
 
    def add_decision(self, decision: str, reason: str):
        """Track project decisions."""
        self.decisions.append({
            "decision": decision,
            "reason": reason,
            "timestamp": str(datetime.now())
        })
 
    def get_project_summary(self) -> str:
        """Generate a summary of project information."""
        summary = "Project Details:\n"
        for key, value in self.project_info.items():
            summary += f"- {key}: {value}\n"
 
        if self.decisions:
            summary += "\nRecent Decisions:\n"
            for decision in self.decisions[-3:]:  # Last 3 decisions
                summary += f"- {decision['decision']}: {decision['reason']}\n"
 
        return summary
 
# Use custom memory in a chain
project_memory = ProjectMemory()
project_memory.add_project_detail("name", "AI Task Manager")
project_memory.add_project_detail("tech_stack", "React, Node.js, MongoDB")
 
template = """You are helping with a software project.
{project_summary}
 
Human: {input}
AI:"""
 
prompt = PromptTemplate(
    input_variables=["project_summary", "input"],
    template=template
)
 
chain = LLMChain(
    llm=ChatOpenAI(),
    prompt=prompt,
    verbose=True
)
 
# Use with project memory
result = chain.invoke({
    "project_summary": project_memory.get_project_summary(),
    "input": "What architecture would you recommend for this project?"
})
 
# Add a decision made
project_memory.add_decision(
    "Architecture choice",
    "Recommended microservices architecture for scalability"
)

๐Ÿ”„ Memory Persistence

Saving and Loading Memory

import json
from langchain.memory import ConversationBufferMemory
 
# Create memory with some conversation
memory = ConversationBufferMemory()
conversation = ConversationChain(llm=ChatOpenAI(), memory=memory)
conversation.predict(input="I'm building a weather app.")
 
# Save memory to file
def save_memory(memory, filename):
    # Convert messages to serializable format
    messages = [
        {"type": type(msg).__name__, "content": msg.content}
        for msg in memory.chat_memory.messages
    ]
 
    with open(filename, 'w') as f:
        json.dump(messages, f, indent=2)
 
def load_memory(filename):
    with open(filename, 'r') as f:
        messages_data = json.load(f)
 
    memory = ConversationBufferMemory()
    for msg_data in messages_data:
        # Reconstruct messages (simplified)
        from langchain.schema import HumanMessage, AIMessage
        if msg_data["type"] == "HumanMessage":
            memory.chat_memory.add_user_message(msg_data["content"])
        elif msg_data["type"] == "AIMessage":
            memory.chat_memory.add_ai_message(msg_data["content"])
 
    return memory
 
# Save memory
save_memory(memory, "conversation_history.json")
 
# Load memory later
loaded_memory = load_memory("conversation_history.json")
new_conversation = ConversationChain(llm=ChatOpenAI(), memory=loaded_memory)

๐ŸŽฏ Real-World Examples

Customer Support Bot with Memory

from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
 
# Create memory that summarizes when getting too long
support_memory = ConversationSummaryBufferMemory(
    llm=ChatOpenAI(),
    max_token_limit=1000,  # Summarize when exceeding this limit
    return_messages=True
)
 
support_bot = ConversationChain(
    llm=ChatOpenAI(temperature=0.2),  # Lower temperature for consistency
    memory=support_memory,
    verbose=True
)
 
# Simulate support conversation
print("Customer Support Session:")
response1 = support_bot.predict(input="Hi! I'm having trouble with my order #12345.")
response2 = support_bot.predict(input="The package hasn't arrived yet and it's been 2 weeks.")
response3 = support_bot.predict(input="Can you check the tracking status for me?")
response4 = support_bot.predict(input="What was my order number again?")  # Should remember
 
print(f"\nMemory summary: {support_memory.summary}")

Personal Tutor with Memory

from langchain.memory import ConversationKGMemory  # Knowledge Graph memory
from langchain.chains import ConversationChain
 
# Memory that builds a knowledge graph of topics discussed
kg_memory = ConversationKGMemory(llm=ChatOpenAI())
 
tutor = ConversationChain(
    llm=ChatOpenAI(temperature=0.3),
    memory=kg_memory,
    verbose=True
)
 
# Tutoring session
print("Tutoring Session:")
tutor.predict(input="I'm learning about machine learning.")
tutor.predict(input="Can you explain neural networks? I'm confused about backpropagation.")
tutor.predict(input="What's the difference between supervised and unsupervised learning?")
 
# View the knowledge graph
print(f"Knowledge Graph Entities: {kg_memory.kg.get_triples()}")

๐ŸŽ›๏ธ Memory Configuration

Advanced Memory Settings

from langchain.memory import ConversationBufferMemory
from langchain.schema import BaseMessage
 
# Configure memory with custom message formatting
memory = ConversationBufferMemory(
    return_messages=True,  # Return message objects instead of strings
    human_prefix="User",    # Custom prefix for human messages
    ai_prefix="Assistant",  # Custom prefix for AI messages
    memory_key="chat_history"  # Custom key for prompt templates
)
 
# Add memory callbacks
from langchain.callbacks.base import BaseCallbackHandler
 
class MemoryTracker(BaseCallbackHandler):
    def on_llm_end(self, response, **kwargs):
        print(f"Memory usage: {len(memory.chat_memory.messages)} messages")
 
# Use with callbacks
conversation = ConversationChain(
    llm=ChatOpenAI(callbacks=[MemoryTracker()]),
    memory=memory,
    verbose=True
)

๐ŸŽฏ Best Practices

1. Choose the Right Memory Type

  • BufferMemory: Simple applications, short conversations
  • WindowMemory: Long conversations with recent context
  • SummaryMemory: Extended conversations needing context
  • KG Memory: Complex topic relationships
  • EntityMemory: Tracking specific people/objects

2. Manage Token Costs

# Monitor memory usage
def check_memory_size(memory):
    message_count = len(memory.chat_memory.messages)
    estimated_tokens = sum(len(msg.content.split()) * 1.3 for msg in memory.chat_memory.messages)
    print(f"Messages: {message_count}, Estimated tokens: {estimated_tokens:.0f}")
 
check_memory_size(memory)

3. Clear Memory When Necessary

# Clear specific parts of memory
memory.chat_memory.clear()
 
# Or reset specific conversation
memory.clear()

4. Add Metadata to Messages

from langchain.schema import HumanMessage, AIMessage
 
# Add messages with metadata
memory.chat_memory.add_message(
    HumanMessage(content="Help with Python", additional_kwargs={"topic": "programming"})
)
 
# Filter messages by metadata
programming_messages = [
    msg for msg in memory.chat_memory.messages
    if msg.additional_kwargs.get("topic") == "programming"
]

5. Combine Memory Types

from langchain.memory import CombinedMemory
 
# Combine different memory types
buffer_memory = ConversationBufferMemory()
entity_memory = SimpleEntityMemory()
 
combined_memory = CombinedMemory(
    memories=[buffer_memory, entity_memory],
    input_keys=["input"],
    output_keys=["output"]
)

Memory components are essential for creating engaging, context-aware AI applications. Next, explore LangGraph for building more complex, stateful workflows.