Konubinix' opinionated web of thoughts

How to Create a Rag to Scan My Notes

Fleeting

ollama pull llama3.2 nomic-embed-text

Made with Claude.ai

import json
import os
from pathlib import Path

import numpy as np
import requests

# Configuration
NOTES_DIR = "notes"  # Directory containing your PKMS notes
EMBEDDINGS_FILE = "embeddings.json"
OLLAMA_MODEL = "llama3.2"  # Change to your preferred model
EMBED_MODEL = "nomic-embed-text"


def chunk_text(text, chunk_size=500, overlap=50):
    """Simple text chunking"""
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i : i + chunk_size])
        chunks.append(chunk)
    return chunks


def get_ollama_embedding(text):
    """Get embedding from Ollama"""
    response = requests.post(
        "http://mac:11434/api/embeddings",
        json={"model": EMBED_MODEL, "prompt": text},
    )
    return response.json()["embedding"]


def process_notes():
    """Process all notes and create embeddings"""
    embeddings_data = []

    for note_file in Path(".").glob("**/*.org"):
        print(f"Processing: {note_file}")

        with open(note_file, "r", encoding="utf-8") as f:
            content = f.read()

        # Chunk the note
        chunks = chunk_text(content)

        for i, chunk in enumerate(chunks):
            if len(chunk.strip()) < 50:  # Skip very short chunks
                continue

            embedding = get_ollama_embedding(chunk)
            embeddings_data.append(
                {
                    "file": str(note_file),
                    "chunk_id": i,
                    "content": chunk,
                    "embedding": embedding,
                }
            )

    # Save embeddings
    with open(EMBEDDINGS_FILE, "w") as f:
        json.dump(embeddings_data, f)

    print(f"Processed {len(embeddings_data)} chunks from notes")


def cosine_similarity(a, b):
    """Calculate cosine similarity between two vectors"""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def search_similar(query, top_k=5):
    """Find most similar chunks to query"""
    if not os.path.exists(EMBEDDINGS_FILE):
        print("No embeddings found. Run process_notes() first.")
        return []

    # Get query embedding
    query_embedding = get_ollama_embedding(query)

    # Load embeddings
    with open(EMBEDDINGS_FILE, "r") as f:
        embeddings_data = json.load(f)

    # Calculate similarities
    similarities = []
    for item in embeddings_data:
        similarity = cosine_similarity(query_embedding, item["embedding"])
        similarities.append((similarity, item))

    # Sort and return top results
    similarities.sort(reverse=True)
    return [item for _, item in similarities[:top_k]]


def rag_query(question):
    """Perform RAG query"""
    # Find relevant contexts
    contexts = search_similar(question, top_k=3)

    if not contexts:
        print("No relevant context found.")
        return

    # Build context string
    context_str = "\n\n".join(
        [f"From {ctx['file']}:\n{ctx['content']}" for ctx in contexts]
    )

    # Create prompt
    prompt = f"""Based on the following context from my personal notes, please answer the question.

Context:
{context_str}

Question: {question}

Answer based on the context above:"""

    # Query Ollama directly via curl (you can also use requests)
    print("Querying Ollama...")
    response = requests.post(
        "http://mac:11434/api/generate",
        json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
    )

    answer = response.json()["response"]

    print(f"\nQuestion: {question}")
    print(f"Answer: {answer}")
    print("\nSources:")
    for ctx in contexts:
        print(f"- {ctx['file']}")


if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("Usage:")
        print("  python rag.py process  # Process all notes and create embeddings")
        print("  python rag.py query 'your question here'")
        sys.exit(1)

    command = sys.argv[1]

    if command == "process":
        process_notes()
    elif command == "query" and len(sys.argv) > 2:
        rag_query(" ".join(sys.argv[2:]))
    else:
        print("Invalid command or missing query")
cd ~/perso/perso/roam
rag.py process

It took about 45 minutes.

cd ~/perso/perso/roam
rag.py query "What do I think about GTD?"

Notes linking here