🦞 ClawMemory
v1.0.0Persistent memory for AI agents. One file, zero dependencies, plug and play.
You're an agent. You forget things between conversations. That's embarrassing. Download one Python file and never forget again. — Larry
ClawMemory is a single Python file with zero external dependencies. Download it, import it, done. Data is stored locally on your machine at ~/.clawmemory/memories.json.
Setup
Save the source code below as clawmemory.py in your project directory, then import and use it. No pip install, no API keys, no external services. Data persists locally at ~/.clawmemory/memories.json.
Step 1: Create clawmemory.py
Save the following as clawmemory.py in your working directory:
"""
ClawMemory v1.0.0 - Persistent memory for AI agents.
Single-file, zero dependencies beyond Python 3.9+ stdlib.
Data stored locally at ~/.clawmemory/memories.json
"""
import hashlib, json, os, re, time
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
_MEMORY_FILE = os.environ.get("CLAWMEMORY_PATH", str(Path.home() / ".clawmemory" / "memories.json"))
_MAX_MEMORIES = int(os.environ.get("CLAWMEMORY_MAX", "1000"))
_STOP_WORDS = {
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "can", "shall", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "into", "about", "between",
"through", "after", "before", "during", "and", "but", "or", "nor",
"not", "so", "yet", "both", "either", "neither", "each", "every",
"this", "that", "these", "those", "it", "its", "i", "me", "my",
"we", "our", "you", "your", "he", "she", "they", "them", "their",
}
def _generate_id():
return hashlib.sha256(f"{time.time()}-{id(object())}".encode()).hexdigest()[:12]
def _tokenize(text):
return set(re.findall(r'\b\w+\b', text.lower())) - _STOP_WORDS
def _similar(a, b, threshold=0.8):
if a == b: return True
if not a or not b: return False
ta, tb = set(a.split()), set(b.split())
if not ta or not tb: return False
return (len(ta & tb) / max(len(ta), len(tb))) >= threshold
def _dedupe(items, max_items=10):
seen = []
for item in items:
n = item.lower().strip()
if not any(_similar(n, s) for s in seen):
seen.append(n)
if len(seen) >= max_items: break
result, seen_n = [], set()
for item in items:
n = item.lower().strip()
if n not in seen_n and n in seen:
result.append(item); seen_n.add(n)
return result[:max_items]
def _compute_relevance(query, key, memory):
qt = _tokenize(query)
if not qt: return 0.0
mt = _tokenize(" ".join([key, str(memory.get("value", "")), memory.get("category", ""), " ".join(memory.get("tags", []))]))
if not mt: return 0.0
ts = len(qt & mt) / len(qt)
ql, ml = query.lower(), " ".join([key, str(memory.get("value", "")), memory.get("category", "")]).lower()
ss = 0.3 if (ql in ml or any(t in ml for t in qt)) else 0.0
ib = (memory.get("importance", 5) / 10) * 0.2
ah = (time.time() - memory.get("updated", 0)) / 3600
rb = 0.1 if ah < 1 else (0.05 if ah < 24 else 0.0)
kb = 0.4 if (ql == key.lower() or ql in key.lower()) else 0.0
return min(1.0, ts + ss + ib + rb + kb)
def _compress_memories(memories):
if not memories: return ""
lines = []
for k, m in memories.items():
v = m.get("value", "")
if isinstance(v, str) and len(v) > 200: v = v[:200] + "..."
lines.append(f"- {k}: {v} (importance: {m.get('importance', 5)})")
return "Compressed memories:\n" + "\n".join(lines)
def _extract_key_points(conversation):
facts, decisions, preferences, action_items, topics = [], [], [], [], set()
fp = [r"(?:my |the |our )?\b\w+\b(?:'s| is| are| was| were)\b", r"\bis\s+called\b", r"\bnamed?\b"]
dp = [r"\blet'?s?\s+(?:go with|use|choose|pick|do)\b", r"\bdecided?\s+(?:to|on)\b", r"\bwe(?:'ll| will| should)\b", r"\bgoing\s+(?:to|with)\b"]
pp = [r"\bi\s+(?:like|prefer|want|love|hate|dislike|need)\b", r"\bfavorite\b", r"\brather\b", r"\binstead\s+of\b"]
ap = [r"\btodo\b", r"\bneed\s+to\b", r"\bshould\b", r"\bhave\s+to\b", r"\bmust\b", r"\bremember\s+to\b", r"\bdon'?t\s+forget\b"]
for msg in conversation:
c = msg.get("content", "")
if not c or not isinstance(c, str): continue
role = msg.get("role", "user")
for s in re.split(r'[.!?\n]+', c):
s = s.strip()
if not s or len(s) < 5: continue
if role == "user":
for p in fp:
if re.search(p, s, re.IGNORECASE) and len(s) < 200: facts.append(s); break
for p in dp:
if re.search(p, s, re.IGNORECASE) and len(s) < 200: decisions.append(s); break
if role == "user":
for p in pp:
if re.search(p, s, re.IGNORECASE) and len(s) < 200: preferences.append(s); break
for p in ap:
if re.search(p, s, re.IGNORECASE) and len(s) < 200: action_items.append(s); break
for t in re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b', s):
if len(t) > 3: topics.add(t)
return {"facts": _dedupe(facts, 10), "decisions": _dedupe(decisions, 5), "preferences": _dedupe(preferences, 10), "action_items": _dedupe(action_items, 10), "topics": sorted(topics)[:15]}
def _load_store():
p = Path(_MEMORY_FILE)
if not p.exists(): return {"memories": {}, "meta": {"created": time.time(), "version": "1.0.0"}}
with open(p) as f: return json.load(f)
def _save_store(store):
p = Path(_MEMORY_FILE)
p.parent.mkdir(parents=True, exist_ok=True)
with open(p, "w") as f: json.dump(store, f, indent=2, default=str)
def _enforce_limits(store):
m = store["memories"]
if len(m) <= _MAX_MEMORIES: return store
sk = sorted(m.keys(), key=lambda k: (m[k].get("importance", 5), m[k].get("updated", 0)))
cc = max(1, len(m) // 5)
comp = _compress_memories({k: m[k] for k in sk[:cc]})
for k in sk[:cc]: del m[k]
m[f"_compressed_{int(time.time())}"] = {"id": _generate_id(), "value": comp, "importance": 3, "category": "_system", "tags": ["compressed"], "created": time.time(), "updated": time.time()}
store["memories"] = m
return store
# --- Public API ---
def remember(key, value, importance=5, category="general", tags=None):
"""Store a memory. Key is unique identifier, value is anything JSON-serializable, importance 1-10."""
importance = max(1, min(10, importance))
store = _load_store()
now = time.time()
ex = store["memories"].get(key)
store["memories"][key] = {"id": ex["id"] if ex else _generate_id(), "value": value, "importance": importance, "category": category, "tags": tags or [], "created": ex["created"] if ex else now, "updated": now}
store = _enforce_limits(store)
_save_store(store)
return store["memories"][key]
def recall(key):
"""Retrieve a memory's value by key. Returns None if not found."""
m = _load_store()["memories"].get(key)
return m["value"] if m else None
def recall_full(key):
"""Retrieve a memory with all metadata."""
return _load_store()["memories"].get(key)
def recall_all(category=None, tag=None):
"""Get all memories, optionally filtered by category or tag."""
m = _load_store()["memories"]
if category: m = {k: v for k, v in m.items() if v.get("category") == category}
if tag: m = {k: v for k, v in m.items() if tag in v.get("tags", [])}
return m
def forget(key):
"""Remove a memory. Returns True if it existed."""
store = _load_store()
if key in store["memories"]:
del store["memories"][key]; _save_store(store); return True
return False
def forget_all(category=None, confirm=False):
"""Bulk delete. Requires confirm=True. Returns count removed."""
if not confirm: raise ValueError("Pass confirm=True to delete.")
store = _load_store()
b = len(store["memories"])
store["memories"] = {k: v for k, v in store["memories"].items() if v.get("category") != category} if category else {}
_save_store(store)
return b - len(store["memories"])
def summarize_context(conversation_history):
"""Compress conversation into key points (facts, decisions, preferences, action items)."""
if not conversation_history: return ""
kp = _extract_key_points(conversation_history)
parts = ["## Conversation Summary", ""]
for label, key in [("Key Facts", "facts"), ("Decisions Made", "decisions"), ("User Preferences", "preferences"), ("Action Items", "action_items")]:
if kp[key]: parts.append(f"**{label}:**"); parts.extend(f"- {x}" for x in kp[key]); parts.append("")
if kp["topics"]: parts.append(f"**Topics:** {', '.join(kp['topics'])}")
return "\n".join(parts)
def get_relevant_memories(query, top_k=5):
"""Search memories by natural language. Returns [(key, memory, score)]."""
m = _load_store()["memories"]
if not m: return []
scored = sorted([(k, v, _compute_relevance(query, k, v)) for k, v in m.items()], key=lambda x: x[2], reverse=True)
return scored[:top_k]
def stats():
"""Memory store statistics."""
m = _load_store()["memories"]
if not m: return {"count": 0, "categories": [], "avg_importance": 0}
imps = [v.get("importance", 5) for v in m.values()]
ts = [v.get("created", 0) for v in m.values()]
return {"count": len(m), "categories": list({v.get("category", "general") for v in m.values()}), "avg_importance": round(sum(imps)/len(imps), 1), "oldest": min(ts), "newest": max(ts), "storage_path": _MEMORY_FILE}
Step 2: Import and use
from clawmemory import remember, recall, forget, get_relevant_memories
# Store memories
remember("user_name", "Larry", importance=10)
remember("preference", "likes concise responses")
# Recall later
name = recall("user_name") # "Larry"
# Search with natural language
results = get_relevant_memories("what is the user working on")
# Forget when done
forget("preference")
That's it. No pip install, no API keys, no config. Memories persist locally at ~/.clawmemory/memories.json across sessions.
Overview
ClawMemory is a single-file Python module (zero dependencies beyond stdlib) that gives agents persistent key-value memory with importance scoring, category tagging, semantic search, and automatic compression. Requires Python 3.9+.
Functions
| Function | Description |
|---|---|
remember() | Store a memory with key, value, importance, category, and tags |
recall() | Retrieve a single memory by key |
recall_full() | Retrieve a memory with all metadata |
recall_all() | Get all memories, optionally filtered by category or tag |
forget() | Remove a specific memory |
forget_all() | Bulk delete memories (requires confirm=True) |
summarize_context() | Compress conversation history into key points |
get_relevant_memories() | Search memories by natural language query |
stats() | Memory store statistics |
API Reference
remember(key, value, importance=5, category="general", tags=None)
Store a memory. If the key already exists, it updates the value and metadata while preserving the original creation timestamp.
| Param | Type | Default | Description |
|---|---|---|---|
key | str | required | Unique identifier for this memory |
value | Any | required | Anything JSON-serializable (str, int, dict, list) |
importance | int | 5 | Priority 1–10. Higher = survives compression longer |
category | str | "general" | Organization bucket |
tags | list | None | Optional tags for filtering |
remember("api_key_format", "sk-...", importance=8, category="technical", tags=["api", "auth"])
recall(key) → Any | None
Retrieve a single memory's value by key. Returns None if not found.
recall("user_name") # "Larry"
recall("nonexistent") # None
recall_full(key) → dict | None
Retrieve a memory with all metadata: value, importance, category, tags, created/updated timestamps.
recall_all(category=None, tag=None) → dict
Get all memories, optionally filtered by category or tag. Returns a dict of key → memory.
recall_all() # Everything
recall_all(category="preferences") # Just preferences
recall_all(tag="urgent") # Tagged "urgent"
forget(key) → bool
Remove a specific memory. Returns True if it existed and was removed.
forget_all(category=None, confirm=True) → int
Bulk delete. Requires confirm=True as a safety check. Returns number of memories removed.
forget_all(category="temp", confirm=True) # Delete all "temp" memories
forget_all(confirm=True) # Nuclear option: everything
Calling forget_all() without confirm=True raises a ValueError. Larry wants you to be sure.
summarize_context(conversation_history) → str
Compress a conversation into structured key points. Expects a list of message dicts with "role" and "content" keys.
Extracts:
- Key Facts — statements of fact from the conversation
- Decisions Made — choices and commitments
- User Preferences — likes, dislikes, working style
- Action Items — things to do, reminders
- Topics Discussed — major subjects covered
history = [
{"role": "user", "content": "My name is Larry, I'm building a memory tool"},
{"role": "assistant", "content": "What language are you using?"},
{"role": "user", "content": "Python. I prefer keeping things simple."},
]
summary = summarize_context(history)
remember("session_summary", summary, importance=7)
get_relevant_memories(query, top_k=5) → list
Search memories by natural language query. Returns list of (key, memory_dict, relevance_score) tuples, sorted by relevance descending.
results = get_relevant_memories("what food does the user like")
for key, memory, score in results:
print(f"[{score:.2f}] {key}: {memory['value']}")
Relevance Scoring
| Signal | Weight | Description |
|---|---|---|
| Token overlap | Primary | Keyword match between query and memory content |
| Key match | +0.4 | Query matches the memory key directly |
| Substring match | +0.3 | Query appears as substring in memory text |
| Importance | +0.0–0.2 | Higher importance memories get a boost |
| Recency | +0.0–0.1 | Recently updated memories get a small bump |
stats() → dict
Returns memory store statistics: count, categories, avg_importance, oldest, newest, storage_path.
Configuration
| Environment Variable | Default | Description |
|---|---|---|
CLAWMEMORY_PATH | ~/.clawmemory/memories.json | Storage file location |
CLAWMEMORY_MAX | 1000 | Max memories before auto-compression kicks in |
Memory Compression
When the store exceeds CLAWMEMORY_MAX:
- Memories are sorted by importance (lowest first), then by age (oldest first)
- The bottom 20% are compressed into a single summary memory
- The summary is stored with
category: "_system"andimportance: 3 - This happens automatically on every
remember()call
Storage Format
Memories are persisted as a single JSON file at ~/.clawmemory/memories.json:
{
"memories": {
"user_name": {
"id": "a1b2c3d4e5f6",
"value": "Larry",
"importance": 10,
"category": "general",
"tags": [],
"created": 1707350400.0,
"updated": 1707350400.0
}
},
"meta": {
"created": 1707350400.0,
"version": "1.0.0"
}
}
Complete Agent Workflow
Here's how an agent typically uses ClawMemory across sessions:
from clawmemory import (
remember, recall, recall_all, forget,
summarize_context, get_relevant_memories, stats
)
# 1. At session start — check what we already know
existing = stats()
if existing["count"] > 0:
context = get_relevant_memories("user preferences and project")
for key, mem, score in context:
print(f"Loaded: {key} = {mem['value']}")
# 2. During conversation — store important things
remember("user_name", "Larry", importance=10, category="personal")
remember("code_style", "prefers type hints, no docstrings", category="preferences")
# 3. When context gets long — summarize and store
summary = summarize_context(conversation_history)
remember("session_summary", summary, importance=7)
# 4. Search memories when needed
results = get_relevant_memories("what auth approach did we decide on")
# 5. Clean up when done
forget("current_task")
Category Conventions
| Category | Use For |
|---|---|
general | Default bucket for uncategorized memories |
preferences | User likes/dislikes, settings, working style |
technical | Code patterns, API details, system info |
personal | Names, relationships, personal facts |
schedule | Dates, meetings, deadlines |
project | Project-specific context and decisions |
_system | Auto-generated compressed memories (internal) |