RAG Implementation Guide

Overview

Hybrid search combines the strengths of two retrieval methods:

Semantic search (vector/dense): Understands meaning and context
Keyword search (BM25/sparse): Matches exact terms and rare words

By combining both, you get the best of both worlds: semantic understanding AND precise term matching.

Why Hybrid Search?

Semantic Search Limitations

Struggles with:

Exact matches (product codes, IDs, names)
Rare or technical terms
Acronyms and abbreviations
Numbers and dates

# Semantic search fails:
query = "iPhone 15 Pro"
# May return: "iPhone 14 Pro", "Samsung Galaxy S23"
# Misses exact model number

Keyword Search Limitations

Struggles with:

Synonyms and paraphrasing
Conceptual similarity
Context and meaning

# Keyword search fails:
query = "how to fix broken screen"
# Misses: "screen repair guide", "display replacement tutorial"
# Only matches exact words

Hybrid Solution

# Hybrid search succeeds:
query = "iPhone 15 Pro screen repair"
# Combines:
# - Semantic: "screen repair", "display replacement"
# - Keyword: "iPhone 15 Pro" (exact match)
# Result: Best of both!

Implementation

Basic Hybrid Search

from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25Okapi
import numpy as np

class HybridRetriever:
    def __init__(self, documents):
        self.documents = documents
        
        # Semantic search setup
        self.semantic_model = SentenceTransformer('all-mpnet-base-v2')
        self.doc_embeddings = self.semantic_model.encode(documents)
        
        # Keyword search setup
        tokenized_docs = [doc.lower().split() for doc in documents]
        self.bm25 = BM25Okapi(tokenized_docs)
    
    def hybrid_search(self, query, k=5, alpha=0.5):
        """
        Hybrid search combining semantic and keyword
        
        Args:
            query: Search query
            k: Number of results
            alpha: Weight for semantic (0=keyword only, 1=semantic only)
        """
        # Semantic scores
        query_emb = self.semantic_model.encode(query)
        semantic_scores = np.dot(self.doc_embeddings, query_emb)
        semantic_scores = (semantic_scores - semantic_scores.min()) / \
                         (semantic_scores.max() - semantic_scores.min())
        
        # Keyword scores (BM25)
        tokenized_query = query.lower().split()
        bm25_scores = self.bm25.get_scores(tokenized_query)
        bm25_scores = (bm25_scores - bm25_scores.min()) / \
                     (bm25_scores.max() - bm25_scores.min() + 1e-6)
        
        # Combine scores
        hybrid_scores = alpha * semantic_scores + (1 - alpha) * bm25_scores
        
        # Get top K
        top_indices = np.argsort(hybrid_scores)[::-1][:k]
        
        return [(self.documents[i], hybrid_scores[i]) for i in top_indices]

# Usage
documents = [
    "iPhone 15 Pro features A17 Pro chip",
    "iPhone 14 Pro has great camera",
    "Samsung Galaxy S23 Ultra review",
    "How to repair iPhone screen",
    "Display replacement guide for smartphones"
]

retriever = HybridRetriever(documents)

results = retriever.hybrid_search(
    query="iPhone 15 Pro screen repair",
    k=3,
    alpha=0.5  # Equal weight
)

for doc, score in results:
    print(f"{score:.3f}: {doc}")

With Vector Database

import lancedb
from rank_bm25 import BM25Okapi

class VectorDBHybridSearch:
    def __init__(self, db_path="./hybrid-db"):
        self.model = SentenceTransformer('all-mpnet-base-v2')
        self.db = lancedb.connect(db_path)
    
    def index_documents(self, documents):
        """Index documents for hybrid search"""
        # Create embeddings
        embeddings = self.model.encode(documents)
        
        # Store in vector DB
        data = [
            {
                "id": i,
                "text": doc,
                "vector": emb.tolist(),
                "tokens": doc.lower().split()  # For BM25
            }
            for i, (doc, emb) in enumerate(zip(documents, embeddings))
        ]
        
        self.table = self.db.create_table("docs", data, mode="overwrite")
        
        # Build BM25 index
        tokenized_docs = [doc.lower().split() for doc in documents]
        self.bm25 = BM25Okapi(tokenized_docs)
    
    def search(self, query, k=10, alpha=0.5):
        """Hybrid search"""
        # Vector search
        query_emb = self.model.encode(query)
        vector_results = self.table.search(query_emb).limit(k*2).to_list()
        
        # BM25 search
        tokenized_query = query.lower().split()
        bm25_scores = self.bm25.get_scores(tokenized_query)
        
        # Normalize and combine
        doc_scores = {}
        
        # Add vector scores
        for result in vector_results:
            doc_id = result['id']
            # Convert distance to similarity
            sim_score = 1 / (1 + result['_distance'])
            doc_scores[doc_id] = alpha * sim_score
        
        # Add BM25 scores
        for doc_id, bm25_score in enumerate(bm25_scores):
            if doc_id in doc_scores:
                doc_scores[doc_id] += (1 - alpha) * bm25_score
            else:
                doc_scores[doc_id] = (1 - alpha) * bm25_score
        
        # Sort and return top K
        sorted_docs = sorted(doc_scores.items(), key=lambda x: x[1], reverse=True)[:k]
        
        results = []
        for doc_id, score in sorted_docs:
            doc = self.table.search().where(f"id = {doc_id}").limit(1).to_list()[0]
            results.append((doc['text'], score))
        
        return results

Score Normalization

Min-Max Normalization

def normalize_scores(scores):
    """Normalize to 0-1 range"""
    min_score = np.min(scores)
    max_score = np.max(scores)
    return (scores - min_score) / (max_score - min_score + 1e-6)

Z-Score Normalization

def z_score_normalize(scores):
    """Normalize using z-scores"""
    mean = np.mean(scores)
    std = np.std(scores)
    return (scores - mean) / (std + 1e-6)

Rank-Based Fusion (RRF)

def reciprocal_rank_fusion(semantic_ranks, keyword_ranks, k=60):
    """
    Combine rankings using RRF
    
    RRF(d) = Σ 1/(k + rank(d))
    """
    scores = {}
    
    # Add semantic ranks
    for rank, doc_id in enumerate(semantic_ranks):
        scores[doc_id] = scores.get(doc_id, 0) + 1/(k + rank + 1)
    
    # Add keyword ranks
    for rank, doc_id in enumerate(keyword_ranks):
        scores[doc_id] = scores.get(doc_id, 0) + 1/(k + rank + 1)
    
    # Sort by combined score
    return sorted(scores.items(), key=lambda x: x[1], reverse=True)

# Usage
semantic_results = [0, 2, 5, 7, 9]  # doc IDs
keyword_results = [1, 0, 3, 5, 8]   # doc IDs

fused = reciprocal_rank_fusion(semantic_results, keyword_results)

Alpha Parameter Tuning

Query-Adaptive Alpha

def adaptive_alpha(query):
    """Adjust alpha based on query characteristics"""
    
    # Has exact terms (product codes, IDs)
    if any(char.isdigit() for char in query):
        return 0.3  # Favor keyword search
    
    # Has quotes (exact phrase)
    if '"' in query:
        return 0.2  # Strong keyword preference
    
    # Long conceptual query
    if len(query.split()) > 8:
        return 0.7  # Favor semantic search
    
    # Default: balanced
    return 0.5

# Usage
query = "iPhone 15 Pro"
alpha = adaptive_alpha(query)  # 0.3 (has numbers)
results = retriever.hybrid_search(query, alpha=alpha)

Domain-Specific Alpha

# E-commerce: Favor keyword (exact product matches)
ECOMMERCE_ALPHA = 0.3

# Documentation: Favor semantic (conceptual search)
DOCS_ALPHA = 0.7

# General search: Balanced
GENERAL_ALPHA = 0.5

Advanced Techniques

Field-Specific Weighting

class FieldWeightedHybrid:
    def search(self, query, k=5):
        """Weight different fields differently"""
        
        # Search in title (higher weight for keywords)
        title_results = self.hybrid_search(
            query,
            field='title',
            alpha=0.3  # Favor exact matches in titles
        )
        
        # Search in content (higher weight for semantic)
        content_results = self.hybrid_search(
            query,
            field='content',
            alpha=0.7  # Favor semantic in content
        )
        
        # Combine with field weights
        combined = {}
        for doc, score in title_results:
            combined[doc] = 0.6 * score  # Title weight: 60%
        
        for doc, score in content_results:
            combined[doc] = combined.get(doc, 0) + 0.4 * score  # Content: 40%
        
        return sorted(combined.items(), key=lambda x: x[1], reverse=True)[:k]

Query Expansion + Hybrid

def expanded_hybrid_search(query, k=5):
    """Expand query then do hybrid search"""
    
    # Expand query with synonyms
    expanded_query = expand_query(query)
    # "phone repair" -> "phone repair fix maintenance"
    
    # Hybrid search on expanded query
    results = retriever.hybrid_search(expanded_query, k=k)
    
    return results

Multi-Stage Hybrid

def multi_stage_hybrid(query, k=5):
    """
    Stage 1: Hybrid retrieval (fast, broad)
    Stage 2: Semantic re-ranking (slow, precise)
    """
    # Stage 1: Get candidates with hybrid
    candidates = retriever.hybrid_search(query, k=k*5, alpha=0.5)
    
    # Stage 2: Re-rank with cross-encoder
    from sentence_transformers import CrossEncoder
    reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
    
    pairs = [[query, doc] for doc, _ in candidates]
    scores = reranker.predict(pairs)
    
    # Sort by reranker scores
    reranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)
    
    return [doc for (doc, _), score in reranked[:k]]

Evaluation

Compare Approaches

def compare_search_methods(queries, ground_truth):
    """Compare semantic, keyword, and hybrid"""
    
    results = {
        'semantic': [],
        'keyword': [],
        'hybrid': []
    }
    
    for query, relevant_docs in zip(queries, ground_truth):
        # Semantic only
        sem_results = retriever.hybrid_search(query, alpha=1.0)
        results['semantic'].append(calculate_recall(sem_results, relevant_docs))
        
        # Keyword only
        kw_results = retriever.hybrid_search(query, alpha=0.0)
        results['keyword'].append(calculate_recall(kw_results, relevant_docs))
        
        # Hybrid
        hyb_results = retriever.hybrid_search(query, alpha=0.5)
        results['hybrid'].append(calculate_recall(hyb_results, relevant_docs))
    
    # Average performance
    for method, recalls in results.items():
        print(f"{method}: {np.mean(recalls):.2%}")

Use Cases

E-commerce

# Product search: exact SKUs + semantic understanding
query = "wireless headphones noise cancelling"
results = retriever.hybrid_search(query, alpha=0.4)
# Finds: Exact "wireless headphones" + semantically similar "bluetooth earbuds"

Documentation

# Code/docs search: function names + concepts
query = "authenticate user JWT"
results = retriever.hybrid_search(query, alpha=0.6)
# Finds: Exact "JWT" + semantic "authentication", "login"

Legal/Medical

# Precise terminology + context
query = "myocardial infarction treatment"
results = retriever.hybrid_search(query, alpha=0.5)
# Finds: Exact medical term + related concepts

Best Practices

Start with α=0.5 and tune based on domain
Use query-adaptive alpha for different query types
Normalize scores before combining
Monitor both methods separately to identify weaknesses
A/B test to find optimal alpha for your use case

Next Steps

Retrieval Fundamentals - Core vector search
MMR - Diversify results
Query Expansion - Improve recall
Parent Document Retrieval - Better context

Overview

Why Hybrid Search?

Semantic Search Limitations

Keyword Search Limitations

Hybrid Solution

Implementation

Basic Hybrid Search

With Vector Database

Score Normalization

Min-Max Normalization

Z-Score Normalization

Rank-Based Fusion (RRF)

Alpha Parameter Tuning

Query-Adaptive Alpha

Domain-Specific Alpha

Advanced Techniques

Field-Specific Weighting

Query Expansion + Hybrid

Multi-Stage Hybrid

Evaluation

Compare Approaches

Use Cases

E-commerce

Documentation

Legal/Medical

Best Practices

Next Steps

Additional Resources