How to Create a Rag to Scan My Notes
Fleetingollama pull llama3.2 nomic-embed-text
Made with Claude.ai
import json
import os
from pathlib import Path
import numpy as np
import requests
# Configuration
NOTES_DIR = "notes" # Directory containing your PKMS notes
EMBEDDINGS_FILE = "embeddings.json"
OLLAMA_MODEL = "llama3.2" # Change to your preferred model
EMBED_MODEL = "nomic-embed-text"
def chunk_text(text, chunk_size=500, overlap=50):
"""Simple text chunking"""
words = text.split()
chunks = []
for i in range(0, len(words), chunk_size - overlap):
chunk = " ".join(words[i : i + chunk_size])
chunks.append(chunk)
return chunks
def get_ollama_embedding(text):
"""Get embedding from Ollama"""
response = requests.post(
"http://mac:11434/api/embeddings",
json={"model": EMBED_MODEL, "prompt": text},
)
return response.json()["embedding"]
def process_notes():
"""Process all notes and create embeddings"""
embeddings_data = []
for note_file in Path(".").glob("**/*.org"):
print(f"Processing: {note_file}")
with open(note_file, "r", encoding="utf-8") as f:
content = f.read()
# Chunk the note
chunks = chunk_text(content)
for i, chunk in enumerate(chunks):
if len(chunk.strip()) < 50: # Skip very short chunks
continue
embedding = get_ollama_embedding(chunk)
embeddings_data.append(
{
"file": str(note_file),
"chunk_id": i,
"content": chunk,
"embedding": embedding,
}
)
# Save embeddings
with open(EMBEDDINGS_FILE, "w") as f:
json.dump(embeddings_data, f)
print(f"Processed {len(embeddings_data)} chunks from notes")
def cosine_similarity(a, b):
"""Calculate cosine similarity between two vectors"""
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def search_similar(query, top_k=5):
"""Find most similar chunks to query"""
if not os.path.exists(EMBEDDINGS_FILE):
print("No embeddings found. Run process_notes() first.")
return []
# Get query embedding
query_embedding = get_ollama_embedding(query)
# Load embeddings
with open(EMBEDDINGS_FILE, "r") as f:
embeddings_data = json.load(f)
# Calculate similarities
similarities = []
for item in embeddings_data:
similarity = cosine_similarity(query_embedding, item["embedding"])
similarities.append((similarity, item))
# Sort and return top results
similarities.sort(reverse=True)
return [item for _, item in similarities[:top_k]]
def rag_query(question):
"""Perform RAG query"""
# Find relevant contexts
contexts = search_similar(question, top_k=3)
if not contexts:
print("No relevant context found.")
return
# Build context string
context_str = "\n\n".join(
[f"From {ctx['file']}:\n{ctx['content']}" for ctx in contexts]
)
# Create prompt
prompt = f"""Based on the following context from my personal notes, please answer the question.
Context:
{context_str}
Question: {question}
Answer based on the context above:"""
# Query Ollama directly via curl (you can also use requests)
print("Querying Ollama...")
response = requests.post(
"http://mac:11434/api/generate",
json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
)
answer = response.json()["response"]
print(f"\nQuestion: {question}")
print(f"Answer: {answer}")
print("\nSources:")
for ctx in contexts:
print(f"- {ctx['file']}")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage:")
print(" python rag.py process # Process all notes and create embeddings")
print(" python rag.py query 'your question here'")
sys.exit(1)
command = sys.argv[1]
if command == "process":
process_notes()
elif command == "query" and len(sys.argv) > 2:
rag_query(" ".join(sys.argv[2:]))
else:
print("Invalid command or missing query")
cd ~/perso/perso/roam
rag.py process
It took about 45 minutes.
cd ~/perso/perso/roam
rag.py query "What do I think about GTD?"