How to Create a Rag to Scan My Notes
Fleeting- External reference: https://simonwillison.net/2023/Oct/23/embeddings/
ollama pull llama3.2 nomic-embed-text
Made with Claude.ai
import json
import os
from pathlib import Path
import numpy as np
import requests
# Configuration
NOTES_DIR = "notes" # Directory containing your PKMS notes
EMBEDDINGS_FILE = "embeddings.json"
OLLAMA_MODEL = "llama3.2" # Change to your preferred model
EMBED_MODEL = "nomic-embed-text"
def chunk_text(text, chunk_size=500, overlap=50):
"""Simple text chunking"""
words = text.split()
chunks = []
for i in range(0, len(words), chunk_size - overlap):
chunk = " ".join(words[i : i + chunk_size])
chunks.append(chunk)
return chunks
def get_ollama_embedding(text):
"""Get embedding from Ollama"""
response = requests.post(
"http://mac:11434/api/embeddings",
json={"model": EMBED_MODEL, "prompt": text},
)
return response.json()["embedding"]
def process_notes():
"""Process all notes and create embeddings"""
embeddings_data = []
for note_file in Path(".").glob("**/*.org"):
print(f"Processing: {note_file}")
with open(note_file, "r", encoding="utf-8") as f:
content = f.read()
# Chunk the note
chunks = chunk_text(content)
for i, chunk in enumerate(chunks):
if len(chunk.strip()) < 50: # Skip very short chunks
continue
embedding = get_ollama_embedding(chunk)
embeddings_data.append(
{
"file": str(note_file),
"chunk_id": i,
"content": chunk,
"embedding": embedding,
}
)
# Save embeddings
with open(EMBEDDINGS_FILE, "w") as f:
json.dump(embeddings_data, f)
print(f"Processed {len(embeddings_data)} chunks from notes")
def cosine_similarity(a, b):
"""Calculate cosine similarity between two vectors"""
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def search_similar(query, top_k=5):
"""Find most similar chunks to query"""
if not os.path.exists(EMBEDDINGS_FILE):
print("No embeddings found. Run process_notes() first.")
return []
# Get query embedding
query_embedding = get_ollama_embedding(query)
# Load embeddings
with open(EMBEDDINGS_FILE, "r") as f:
embeddings_data = json.load(f)
# Calculate similarities
similarities = []
for item in embeddings_data:
similarity = cosine_similarity(query_embedding, item["embedding"])
similarities.append((similarity, item))
# Sort and return top results
similarities.sort(reverse=True)
return [item for _, item in similarities[:top_k]]
def rag_query(question):
"""Perform RAG query"""
# Find relevant contexts
contexts = search_similar(question, top_k=3)
if not contexts:
print("No relevant context found.")
return
# Build context string
context_str = "\n\n".join(
[f"From {ctx['file']}:\n{ctx['content']}" for ctx in contexts]
)
# Create prompt
prompt = f"""Based on the following context from my personal notes, please answer the question.
Context:
{context_str}
Question: {question}
Answer based on the context above:"""
# Query Ollama directly via curl (you can also use requests)
print("Querying Ollama...")
response = requests.post(
"http://mac:11434/api/generate",
json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
)
answer = response.json()["response"]
print(f"\nQuestion: {question}")
print(f"Answer: {answer}")
print("\nSources:")
for ctx in contexts:
print(f"- {ctx['file']}")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage:")
print(" python rag.py process # Process all notes and create embeddings")
print(" python rag.py query 'your question here'")
sys.exit(1)
command = sys.argv[1]
if command == "process":
process_notes()
elif command == "query" and len(sys.argv) > 2:
rag_query(" ".join(sys.argv[2:]))
else:
print("Invalid command or missing query")
cd ~/perso/perso/roam
rag.py process
It took about 45 minutes.
cd ~/perso/perso/roam
rag.py query "What do I think about GTD?"
trying leann
pip install leann I needed to sed http://localhost:11434 -> my url in the source code leann build /home/sam/leann.index –docs . –embedding-mode ollama –force –embedding-model nomic-embed-text:latest –file-types .org
trying llm
pip install llm llm-ollama llm collections delete roam # to remove possible previous embedding model llm embed-multi roam -m mxbai-embed-large –files . ‘*/.org’ litecli ${XDG_CONFIG_HOME}/config/io.datasette.llm/embeddings.db
key idea is this: a user asks a question. You search your private documents for content that appears relevant to the question, then paste excerpts of that content into the LLM (respecting its size limit, usually between 3,000 and 6,000 words) along with the original question. The LLM can then answer the question based on the additional content you provided.