The Problem
Mika leads product at a 35-person e-commerce platform selling 120,000 SKUs of industrial supplies. Site search drives 65% of purchases, but the current keyword-based autocomplete fails 30% of the time. A customer searching "waterproof adhesive for outdoor metal" gets zero results because the product is listed as "weather-resistant bonding agent — exterior ferrous surfaces." The search team estimates these failed searches cost $340K/month in lost conversions. They need autocomplete that understands intent, not just keywords.
Step 1: Build the Embedding Index
Every product gets an embedding that captures its semantic meaning. These are pre-computed and stored in PostgreSQL with pgvector for efficient similarity search.
// src/services/embedding-index.ts — Product embedding generation and storage
import OpenAI from "openai";
import { pool } from "../db";
const openai = new OpenAI();
// Schema: pgvector extension required
// CREATE EXTENSION IF NOT EXISTS vector;
// ALTER TABLE products ADD COLUMN embedding vector(1536);
// CREATE INDEX idx_products_embedding ON products USING ivfflat (embedding vector_cosine_ops) WITH (lists = 200);
const BATCH_SIZE = 100; // OpenAI embeddings API handles 100 inputs per call
export async function indexAllProducts(): Promise<{ indexed: number; errors: number }> {
let indexed = 0;
let errors = 0;
let offset = 0;
while (true) {
const { rows: products } = await pool.query(
`SELECT id, name, description, category, brand, specifications
FROM products WHERE embedding IS NULL
ORDER BY id LIMIT $1 OFFSET $2`,
[BATCH_SIZE, offset]
);
if (products.length === 0) break;
// Build rich text for each product combining all searchable fields
const inputs = products.map((p) =>
buildEmbeddingText(p.name, p.description, p.category, p.brand, p.specifications)
);
try {
const response = await openai.embeddings.create({
model: "text-embedding-3-small", // 1536 dimensions, $0.02/1M tokens
input: inputs,
});
// Batch update embeddings
const client = await pool.connect();
try {
await client.query("BEGIN");
for (let i = 0; i < products.length; i++) {
const embedding = response.data[i].embedding;
await client.query(
"UPDATE products SET embedding = $1 WHERE id = $2",
[`[${embedding.join(",")}]`, products[i].id]
);
}
await client.query("COMMIT");
indexed += products.length;
} catch (err) {
await client.query("ROLLBACK");
errors += products.length;
} finally {
client.release();
}
} catch (err) {
errors += products.length;
}
offset += BATCH_SIZE;
}
return { indexed, errors };
}
function buildEmbeddingText(
name: string,
description: string,
category: string,
brand: string,
specs: Record<string, string> | null
): string {
// Combine fields with semantic context so the embedding captures all aspects
const parts = [
name,
description,
`Category: ${category}`,
`Brand: ${brand}`,
];
if (specs) {
// Include key specifications as natural language
const specText = Object.entries(specs)
.map(([k, v]) => `${k}: ${v}`)
.join(", ");
parts.push(`Specifications: ${specText}`);
}
return parts.join(". ");
}
// Incremental indexing: called when products are created or updated
export async function indexProduct(productId: string): Promise<void> {
const { rows } = await pool.query(
"SELECT name, description, category, brand, specifications FROM products WHERE id = $1",
[productId]
);
if (rows.length === 0) return;
const p = rows[0];
const text = buildEmbeddingText(p.name, p.description, p.category, p.brand, p.specifications);
const response = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
await pool.query("UPDATE products SET embedding = $1 WHERE id = $2", [
`[${response.data[0].embedding.join(",")}]`,
productId,
]);
}
Step 2: Build the Hybrid Search Engine
The search combines three strategies: prefix matching for exact keyword hits, trigram similarity for typo tolerance, and semantic vector search for intent matching. Results are ranked by a weighted fusion of all three scores.
// src/services/search-engine.ts — Hybrid search combining keyword + semantic approaches
import OpenAI from "openai";
import { pool } from "../db";
import { Redis } from "ioredis";
const openai = new OpenAI();
const redis = new Redis(process.env.REDIS_URL!);
interface SearchResult {
id: string;
name: string;
category: string;
brand: string;
price: number;
imageUrl: string;
score: number; // combined relevance score
matchType: "keyword" | "semantic" | "hybrid";
}
export async function hybridSearch(
query: string,
limit: number = 10
): Promise<SearchResult[]> {
// Check cache first — autocomplete queries repeat heavily
const cacheKey = `search:${query.toLowerCase().trim()}`;
const cached = await redis.get(cacheKey);
if (cached) return JSON.parse(cached);
// Run keyword and semantic searches in parallel
const [keywordResults, semanticResults] = await Promise.all([
keywordSearch(query, limit * 2),
semanticSearch(query, limit * 2),
]);
// Reciprocal Rank Fusion — combines rankings from multiple search strategies
const scores = new Map<string, { score: number; data: any; types: Set<string> }>();
const RRF_K = 60; // smoothing constant (standard value from literature)
keywordResults.forEach((result, rank) => {
const existing = scores.get(result.id) || { score: 0, data: result, types: new Set() };
existing.score += 1 / (RRF_K + rank + 1);
existing.types.add("keyword");
scores.set(result.id, existing);
});
semanticResults.forEach((result, rank) => {
const existing = scores.get(result.id) || { score: 0, data: result, types: new Set() };
existing.score += 1 / (RRF_K + rank + 1);
existing.types.add("semantic");
scores.set(result.id, existing);
});
const merged = [...scores.values()]
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map(({ score, data, types }) => ({
...data,
score: Math.round(score * 1000) / 1000,
matchType: types.size > 1 ? "hybrid" : types.has("keyword") ? "keyword" : "semantic",
}));
// Cache for 5 minutes — invalidated on product updates
await redis.setex(cacheKey, 300, JSON.stringify(merged));
return merged;
}
async function keywordSearch(query: string, limit: number): Promise<any[]> {
// Combine prefix match (for autocomplete speed) with trigram similarity (for typos)
const { rows } = await pool.query(
`SELECT id, name, category, brand, price, image_url,
ts_rank(search_vector, plainto_tsquery('english', $1)) as text_rank,
similarity(name, $1) as trigram_score
FROM products
WHERE search_vector @@ plainto_tsquery('english', $1)
OR similarity(name, $1) > 0.15
OR name ILIKE $2
ORDER BY
(ts_rank(search_vector, plainto_tsquery('english', $1)) * 0.6 + similarity(name, $1) * 0.4) DESC
LIMIT $3`,
[query, `%${query}%`, limit]
);
return rows;
}
async function semanticSearch(query: string, limit: number): Promise<any[]> {
// Generate embedding for the search query
const response = await openai.embeddings.create({
model: "text-embedding-3-small",
input: query,
});
const queryEmbedding = response.data[0].embedding;
// Cosine similarity search using pgvector
const { rows } = await pool.query(
`SELECT id, name, category, brand, price, image_url,
1 - (embedding <=> $1::vector) as similarity
FROM products
WHERE embedding IS NOT NULL
ORDER BY embedding <=> $1::vector
LIMIT $2`,
[`[${queryEmbedding.join(",")}]`, limit]
);
return rows;
}
Step 3: Build the Autocomplete API with Debounce-Friendly Response Times
The API needs to return results in under 100ms for a smooth autocomplete experience. Popular queries are pre-cached, and the embedding call is the bottleneck managed through aggressive caching.
// src/routes/search.ts — Autocomplete API optimized for sub-100ms responses
import { Hono } from "hono";
import { Redis } from "ioredis";
import { hybridSearch } from "../services/search-engine";
import { pool } from "../db";
const redis = new Redis(process.env.REDIS_URL!);
const app = new Hono();
// Fast autocomplete endpoint — called on every keystroke (after 2 chars)
app.get("/search/autocomplete", async (c) => {
const query = c.req.query("q")?.trim();
if (!query || query.length < 2) {
return c.json({ suggestions: [] });
}
const start = Date.now();
// For very short queries (2-3 chars), use fast prefix matching only
if (query.length <= 3) {
const suggestions = await prefixSuggestions(query);
return c.json({
suggestions,
latencyMs: Date.now() - start,
strategy: "prefix",
});
}
// For longer queries, use hybrid search
const results = await hybridSearch(query, 8);
// Also fetch category suggestions for broad queries
const categories = await categorySuggestions(query);
// Log query for analytics (async, don't block response)
logSearchQuery(query, results.length).catch(() => {});
return c.json({
suggestions: results.map((r) => ({
id: r.id,
name: r.name,
category: r.category,
brand: r.brand,
price: r.price,
imageUrl: r.imageUrl,
matchType: r.matchType,
})),
categories,
latencyMs: Date.now() - start,
strategy: results[0]?.matchType || "none",
});
});
// Trending and popular searches — pre-computed, fast
app.get("/search/trending", async (c) => {
const cached = await redis.get("search:trending");
if (cached) return c.json(JSON.parse(cached));
const { rows } = await pool.query(
`SELECT query, COUNT(*) as count
FROM search_logs
WHERE searched_at > NOW() - INTERVAL '24 hours'
AND results_count > 0
GROUP BY query
ORDER BY count DESC
LIMIT 10`
);
const trending = rows.map((r) => r.query);
await redis.setex("search:trending", 600, JSON.stringify({ trending }));
return c.json({ trending });
});
async function prefixSuggestions(query: string): Promise<any[]> {
const { rows } = await pool.query(
`SELECT DISTINCT id, name, category, brand, price, image_url
FROM products
WHERE name ILIKE $1
ORDER BY popularity_score DESC
LIMIT 8`,
[`${query}%`]
);
return rows;
}
async function categorySuggestions(query: string): Promise<string[]> {
const { rows } = await pool.query(
`SELECT DISTINCT category, COUNT(*) as count
FROM products
WHERE search_vector @@ plainto_tsquery('english', $1)
GROUP BY category
ORDER BY count DESC
LIMIT 4`,
[query]
);
return rows.map((r) => r.category);
}
async function logSearchQuery(query: string, resultsCount: number) {
await pool.query(
"INSERT INTO search_logs (query, results_count, searched_at) VALUES ($1, $2, NOW())",
[query, resultsCount]
);
}
export default app;
Step 4: Build the React Autocomplete Component
The frontend debounces keystrokes, shows results in a dropdown with category grouping, and highlights why each result matched — giving users confidence that the search understands their intent.
// src/components/SearchAutocomplete.tsx — Debounced autocomplete with semantic highlights
import { useState, useRef, useCallback, useEffect } from "react";
interface Suggestion {
id: string;
name: string;
category: string;
brand: string;
price: number;
imageUrl: string;
matchType: "keyword" | "semantic" | "hybrid";
}
export function SearchAutocomplete({ onSelect }: { onSelect: (id: string) => void }) {
const [query, setQuery] = useState("");
const [suggestions, setSuggestions] = useState<Suggestion[]>([]);
const [categories, setCategories] = useState<string[]>([]);
const [isOpen, setIsOpen] = useState(false);
const [activeIndex, setActiveIndex] = useState(-1);
const debounceRef = useRef<NodeJS.Timeout>();
const search = useCallback(async (q: string) => {
if (q.length < 2) {
setSuggestions([]);
setIsOpen(false);
return;
}
try {
const res = await fetch(`/api/search/autocomplete?q=${encodeURIComponent(q)}`);
const data = await res.json();
setSuggestions(data.suggestions);
setCategories(data.categories || []);
setIsOpen(data.suggestions.length > 0);
} catch {
// fail silently — autocomplete is non-critical UI
}
}, []);
const handleInput = (value: string) => {
setQuery(value);
setActiveIndex(-1);
// Debounce: 150ms for fast typers, fires immediately after pause
clearTimeout(debounceRef.current);
debounceRef.current = setTimeout(() => search(value), 150);
};
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === "ArrowDown") {
e.preventDefault();
setActiveIndex((i) => Math.min(i + 1, suggestions.length - 1));
} else if (e.key === "ArrowUp") {
e.preventDefault();
setActiveIndex((i) => Math.max(i - 1, -1));
} else if (e.key === "Enter" && activeIndex >= 0) {
onSelect(suggestions[activeIndex].id);
setIsOpen(false);
} else if (e.key === "Escape") {
setIsOpen(false);
}
};
return (
<div className="relative w-full max-w-2xl">
<input
type="text"
value={query}
onChange={(e) => handleInput(e.target.value)}
onKeyDown={handleKeyDown}
onFocus={() => suggestions.length > 0 && setIsOpen(true)}
placeholder="Search 120,000+ products..."
className="w-full px-4 py-3 border rounded-lg text-lg focus:ring-2 focus:ring-blue-500"
aria-autocomplete="list"
role="combobox"
aria-expanded={isOpen}
/>
{isOpen && (
<div className="absolute z-50 w-full mt-1 bg-white border rounded-lg shadow-xl max-h-96 overflow-y-auto">
{/* Category suggestions */}
{categories.length > 0 && (
<div className="px-4 py-2 border-b bg-gray-50">
<span className="text-xs text-gray-500 uppercase">Categories</span>
<div className="flex gap-2 mt-1">
{categories.map((cat) => (
<button
key={cat}
className="px-2 py-1 text-sm bg-blue-100 text-blue-700 rounded hover:bg-blue-200"
onClick={() => handleInput(`${query} in ${cat}`)}
>
{cat}
</button>
))}
</div>
</div>
)}
{/* Product suggestions */}
{suggestions.map((item, index) => (
<button
key={item.id}
className={`w-full flex items-center gap-3 px-4 py-3 text-left hover:bg-gray-50 ${
index === activeIndex ? "bg-blue-50" : ""
}`}
onClick={() => {
onSelect(item.id);
setIsOpen(false);
}}
role="option"
aria-selected={index === activeIndex}
>
<img src={item.imageUrl} alt="" className="w-10 h-10 object-cover rounded" />
<div className="flex-1">
<div className="font-medium">{item.name}</div>
<div className="text-sm text-gray-500">
{item.brand} · {item.category}
</div>
</div>
<div className="text-right">
<div className="font-semibold">${item.price.toFixed(2)}</div>
{item.matchType === "semantic" && (
<span className="text-xs text-purple-600 bg-purple-50 px-1.5 py-0.5 rounded">
✨ Smart match
</span>
)}
</div>
</button>
))}
</div>
)}
</div>
);
}
Results
After two weeks with semantic autocomplete in production:
- Failed search rate dropped from 30% to 4% — semantic matching finds relevant products even when customers use completely different terminology than product listings
- Search-to-purchase conversion increased by 23% — users find what they need faster; average time from first keystroke to add-to-cart dropped from 45s to 18s
- Revenue impact: +$78K/month from recovered failed searches converting to purchases
- Autocomplete latency: 85ms average (p99: 140ms) — prefix queries return in 20ms, semantic queries cached after first hit
- Embedding index cost: $14/month — 120K products indexed with text-embedding-3-small; incremental updates on product changes cost ~$0.50/day
- "Smart match" badge builds user trust — customers see the system is working harder to find relevant results, not just doing dumb keyword matching