Konubinix' opinionated web of thoughts

How to Create a Rag to Scan My Notes

Fleeting

ollama pull llama3.2 nomic-embed-text

Made with Claude.ai

import json
import os
from pathlib import Path

import numpy as np
import requests

# Configuration
NOTES_DIR = "notes"  # Directory containing your PKMS notes
EMBEDDINGS_FILE = "embeddings.json"
OLLAMA_MODEL = "llama3.2"  # Change to your preferred model
EMBED_MODEL = "nomic-embed-text"


def chunk_text(text, chunk_size=500, overlap=50):
    """Simple text chunking"""
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i : i + chunk_size])
        chunks.append(chunk)
    return chunks


def get_ollama_embedding(text):
    """Get embedding from Ollama"""
    response = requests.post(
        "http://mac:11434/api/embeddings",
        json={"model": EMBED_MODEL, "prompt": text},
    )
    return response.json()["embedding"]


def process_notes():
    """Process all notes and create embeddings"""
    embeddings_data = []

    for note_file in Path(".").glob("**/*.org"):
        print(f"Processing: {note_file}")

        with open(note_file, "r", encoding="utf-8") as f:
            content = f.read()

        # Chunk the note
        chunks = chunk_text(content)

        for i, chunk in enumerate(chunks):
            if len(chunk.strip()) < 50:  # Skip very short chunks
                continue

            embedding = get_ollama_embedding(chunk)
            embeddings_data.append(
                {
                    "file": str(note_file),
                    "chunk_id": i,
                    "content": chunk,
                    "embedding": embedding,
                }
            )

    # Save embeddings
    with open(EMBEDDINGS_FILE, "w") as f:
        json.dump(embeddings_data, f)

    print(f"Processed {len(embeddings_data)} chunks from notes")


def cosine_similarity(a, b):
    """Calculate cosine similarity between two vectors"""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def search_similar(query, top_k=5):
    """Find most similar chunks to query"""
    if not os.path.exists(EMBEDDINGS_FILE):
        print("No embeddings found. Run process_notes() first.")
        return []

    # Get query embedding
    query_embedding = get_ollama_embedding(query)

    # Load embeddings
    with open(EMBEDDINGS_FILE, "r") as f:
        embeddings_data = json.load(f)

    # Calculate similarities
    similarities = []
    for item in embeddings_data:
        similarity = cosine_similarity(query_embedding, item["embedding"])
        similarities.append((similarity, item))

    # Sort and return top results
    similarities.sort(reverse=True)
    return [item for _, item in similarities[:top_k]]


def rag_query(question):
    """Perform RAG query"""
    # Find relevant contexts
    contexts = search_similar(question, top_k=3)

    if not contexts:
        print("No relevant context found.")
        return

    # Build context string
    context_str = "\n\n".join(
        [f"From {ctx['file']}:\n{ctx['content']}" for ctx in contexts]
    )

    # Create prompt
    prompt = f"""Based on the following context from my personal notes, please answer the question.

Context:
{context_str}

Question: {question}

Answer based on the context above:"""

    # Query Ollama directly via curl (you can also use requests)
    print("Querying Ollama...")
    response = requests.post(
        "http://mac:11434/api/generate",
        json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False},
    )

    answer = response.json()["response"]

    print(f"\nQuestion: {question}")
    print(f"Answer: {answer}")
    print("\nSources:")
    for ctx in contexts:
        print(f"- {ctx['file']}")


if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("Usage:")
        print("  python rag.py process  # Process all notes and create embeddings")
        print("  python rag.py query 'your question here'")
        sys.exit(1)

    command = sys.argv[1]

    if command == "process":
        process_notes()
    elif command == "query" and len(sys.argv) > 2:
        rag_query(" ".join(sys.argv[2:]))
    else:
        print("Invalid command or missing query")
cd ~/perso/perso/roam
rag.py process

It took about 45 minutes.

cd ~/perso/perso/roam
rag.py query "What do I think about GTD?"

trying leann

pip install leann I needed to sed http://localhost:11434 -> my url in the source code leann build /home/sam/leann.index –docs . –embedding-mode ollama –force –embedding-model nomic-embed-text:latest –file-types .org

trying llm

pip install llm llm-ollama llm collections delete roam # to remove possible previous embedding model llm embed-multi roam -m mxbai-embed-large –files . ‘*/.org’ litecli ${XDG_CONFIG_HOME}/config/io.datasette.llm/embeddings.db

key idea is this: a user asks a question. You search your private documents for content that appears relevant to the question, then paste excerpts of that content into the LLM (respecting its size limit, usually between 3,000 and 6,000 words) along with the original question. The LLM can then answer the question based on the additional content you provided.

https://simonwillison.net/2023/Oct/23/embeddings/ ([2025-09-20 Sat])

Notes linking here