Skip to content

Instantly share code, notes, and snippets.

@redknightlois
Created March 20, 2026 17:24
Show Gist options
  • Select an option

  • Save redknightlois/229a55739c5f04de61250b4bda500205 to your computer and use it in GitHub Desktop.

Select an option

Save redknightlois/229a55739c5f04de61250b4bda500205 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
// BASE (pre-PR) on 18081, PR on 18080
const string baseServer = "http://127.0.0.1:18081";
const string prServer = "http://127.0.0.1:18080";
var http = new HttpClient { Timeout = TimeSpan.FromMinutes(30) };
// ── Populate ─────────────────────────────────────────────────────────────────
async Task Populate(string serverUrl, string label, string dbName, int totalDocs, string engine)
{
const int batchSize = 10_000;
// Check if already populated
try
{
var existing = await http.GetStringAsync($"{serverUrl}/databases/{dbName}/stats");
if (existing.Contains($"\"CountOfDocuments\":{totalDocs}"))
{
var idxStats = await http.GetStringAsync($"{serverUrl}/databases/{dbName}/indexes/stats");
if (!idxStats.Contains("\"IsStale\":true") && idxStats.Contains("Products/ByCategoryPrice"))
{
Console.WriteLine($" {label}/{dbName}: {totalDocs:N0} docs + index ready, skipping.");
return;
}
}
}
catch { /* DB doesn't exist yet */ }
Console.WriteLine($" {label}/{dbName}: populating {totalDocs:N0} docs ({engine})...");
var categories = new[] { "Electronics", "Books", "Clothing", "Furniture", "Toys" };
var rng = new Random(42);
// Create database
var createBody = $@"{{""DatabaseName"":""{dbName}"",""Settings"":{{""Indexing.Static.SearchEngineType"":""{engine}""}}}}";
await http.PutAsync($"{serverUrl}/admin/databases",
new StringContent(createBody, Encoding.UTF8, "application/json"));
// Create index
var indexDef = $@"{{""Indexes"":[{{""Name"":""Products/ByCategoryPrice"",""Maps"":[""from p in docs.Products select new {{ p.Category, p.Price, p.Stock }}""],""Configuration"":{{""Indexing.Static.SearchEngineType"":""{engine}""}},""Fields"":{{}}}}]}}";
await http.PutAsync($"{serverUrl}/databases/{dbName}/admin/indexes",
new StringContent(indexDef, Encoding.UTF8, "application/json"));
// Bulk insert
var sw = Stopwatch.StartNew();
int total = 0;
for (int batch = 0; batch < totalDocs / batchSize; batch++)
{
var sb = new StringBuilder(batchSize * 200);
sb.Append("{\"Commands\":[");
for (int i = 0; i < batchSize; i++)
{
int docNum = batch * batchSize + i;
var cat = categories[docNum % categories.Length];
var price = Math.Round(rng.NextDouble() * 2000, 2);
var stock = rng.Next(0, 500);
if (i > 0) sb.Append(',');
sb.Append($@"{{""Id"":""products/{docNum}"",""Type"":""PUT"",""Document"":{{""Category"":""{cat}"",""Price"":{price},""Stock"":{stock},""@metadata"":{{""@collection"":""Products""}}}}}}");
}
sb.Append("]}");
var resp = await http.PostAsync($"{serverUrl}/databases/{dbName}/bulk_docs",
new StringContent(sb.ToString(), Encoding.UTF8, "application/json"));
resp.EnsureSuccessStatusCode();
total += batchSize;
if ((batch + 1) % 30 == 0)
Console.WriteLine($" {total:N0} / {totalDocs:N0} ({sw.Elapsed.TotalSeconds:F1}s)");
}
Console.WriteLine($" inserted {total:N0} docs in {sw.Elapsed.TotalSeconds:F1}s");
Console.Write($" waiting for indexing...");
while (true)
{
var stats = await http.GetStringAsync($"{serverUrl}/databases/{dbName}/indexes/stats");
if (!stats.Contains("\"IsStale\":true") && stats.Contains("Products/ByCategoryPrice")) break;
await Task.Delay(2000);
Console.Write(".");
}
Console.WriteLine(" done");
}
// ── Benchmark helpers ────────────────────────────────────────────────────────
async Task<double> RunRql(string serverUrl, string dbName, string rql)
{
var bodyBytes = Encoding.UTF8.GetBytes(JsonSerializer.Serialize(new { Query = rql }));
var url = $"{serverUrl}/databases/{dbName}/queries";
ByteArrayContent MakeContent()
{
var c = new ByteArrayContent(bodyBytes);
c.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/json") { CharSet = "utf-8" };
return c;
}
// Warmup — 3 calls to stabilize server-side caches (Voron pages, field cache, JIT)
for (int w = 0; w < 3; w++)
{
var wr = await http.PostAsync(url, MakeContent());
wr.EnsureSuccessStatusCode();
await wr.Content.ReadAsStringAsync(); // drain response fully
}
var times = new List<double>();
var total = Stopwatch.StartNew();
while (true)
{
var sw = Stopwatch.StartNew();
var resp = await http.PostAsync(url, MakeContent());
// Drain response body so the connection is clean before next iteration
await resp.Content.ReadAsStringAsync();
sw.Stop();
resp.EnsureSuccessStatusCode();
times.Add(sw.Elapsed.TotalMilliseconds);
if (total.Elapsed >= TimeSpan.FromSeconds(30))
break;
}
times.Sort();
return times[times.Count / 2]; // median
}
string BuildPriceRanges(int count)
{
var step = 2000.0 / count;
var parts = new List<string>();
for (int i = 0; i < count; i++)
{
var lo = Math.Round(i * step, 2);
var hi = Math.Round((i + 1) * step, 2);
if (i == 0) parts.Add($"Price < {hi}");
else if (i == count - 1) parts.Add($"Price >= {lo}");
else parts.Add($"Price >= {lo} and Price < {hi}");
}
return string.Join(", ", parts);
}
// ── Verify both servers are up ───────────────────────────────────────────────
foreach (var (url, label) in new[] { (baseServer, "BASE"), (prServer, "PR") })
{
Console.Write($"Checking {label} ({url})... ");
try
{
var ver = await http.GetStringAsync($"{url}/build/version");
Console.WriteLine($"OK {ver}");
}
catch (Exception ex)
{
Console.WriteLine($"FAILED: {ex.Message}");
return;
}
}
Console.WriteLine();
// ── Size configurations ──────────────────────────────────────────────────────
var sizes = new[] { 3_000_000 };
foreach (var totalDocs in sizes)
{
var sizeSuffix = totalDocs switch
{
>= 1_000_000 => $"{totalDocs / 1_000_000}M",
>= 1_000 => $"{totalDocs / 1_000}K",
_ => $"{totalDocs}"
};
var coraxDb = $"Bench{sizeSuffix}_Corax";
var luceneDb = $"Bench{sizeSuffix}_Lucene";
// ── Populate ─────────────────────────────────────────────────────────
Console.WriteLine($"── Populating {sizeSuffix} databases ──");
await Populate(baseServer, "BASE", coraxDb, totalDocs, "Corax");
await Populate(prServer, "PR", coraxDb, totalDocs, "Corax");
// Lucene on BASE server — makes it clear PR only touched Corax
await Populate(baseServer, "BASE", luceneDb, totalDocs, "Lucene");
Console.WriteLine();
// ── Wait for all indexes to be non-stale ─────────────────────────────
Console.Write("Waiting for all indexes to be non-stale...");
foreach (var (url, label, dbN) in new[]
{
(baseServer, "BASE", coraxDb),
(prServer, "PR", coraxDb),
(baseServer, "BASE", luceneDb),
})
{
while (true)
{
try
{
var stats = await http.GetStringAsync($"{url}/databases/{dbN}/indexes/stats");
if (!stats.Contains("\"IsStale\":true")) break;
}
catch { /* db may not exist yet */ break; }
Console.Write(".");
await Task.Delay(2000);
}
}
Console.WriteLine(" all ready");
Console.WriteLine();
// ── Estimated match counts ───────────────────────────────────────────
var oneCategory = totalDocs / 5; // 5 categories, uniform distribution
Console.WriteLine(new string('=', 110));
Console.WriteLine($"FACETED QUERY BENCHMARKS — {sizeSuffix} docs — PR #22425 (RavenDB-26098)");
Console.WriteLine($" Corax BASE (pre-PR): {baseServer} Corax PR: {prServer} Lucene BASE: {baseServer}");
Console.WriteLine(new string('=', 110));
Console.WriteLine();
// ── Helper to bench a single query across all 3 ──────────────────────
async Task BenchRow(string name, string rql)
{
var baseCorax = await RunRql(baseServer, coraxDb, rql);
var prCorax = await RunRql(prServer, coraxDb, rql);
var lucene = await RunRql(baseServer, luceneDb, rql);
var speedup = baseCorax / prCorax;
Console.WriteLine($" {name,-50} {baseCorax,9:F1}ms {prCorax,9:F1}ms {speedup,6:F1}x {lucene,9:F1}ms");
}
void Header()
{
Console.WriteLine($" {"Query",-50} {"Corax OLD",11} {"Corax NEW",11} {"Gain",6} {"Lucene",11}");
Console.WriteLine($" {new string('-', 50)} {new string('-', 11)} {new string('-', 11)} {new string('-', 6)} {new string('-', 11)}");
}
// ─── S1: Range facets + WHERE (count only) ──────────────────────────
Console.WriteLine("── S1: Range facets + WHERE (count only → indexed path) ──");
Console.WriteLine(" Old Corax: scanning O(N×M) seeks New Corax: indexed HashSet intersection");
Header();
foreach (var n in new[] { 5, 10, 20, 50, 100 })
{
var rql = $"from index 'Products/ByCategoryPrice' where Category = 'Electronics' select facet({BuildPriceRanges(n)})";
await BenchRow($"Range×{n}, WHERE Category='Electronics'", rql);
}
Console.WriteLine();
// ─── S2: Range facets + wide WHERE ──────────────────────────────────
Console.WriteLine("── S2: Range facets + wide WHERE (all docs match) ──");
Header();
foreach (var n in new[] { 10, 50, 100 })
{
var rql = $"from index 'Products/ByCategoryPrice' where Price > 0 select facet({BuildPriceRanges(n)})";
await BenchRow($"Range×{n}, WHERE Price>0 (~{sizeSuffix})", rql);
}
Console.WriteLine();
// ─── S3: Term facets + WHERE ────────────────────────────────────────
Console.WriteLine("── S3: Term facets + WHERE (count only → indexed path) ──");
Console.WriteLine(" Old Corax: scanning per-doc New Corax: indexed HashSet per term");
Header();
await BenchRow("Term(Category), WHERE Category='Electronics'",
"from index 'Products/ByCategoryPrice' where Category = 'Electronics' select facet(Category)");
await BenchRow($"Term(Category), WHERE Price>0 (~{sizeSuffix})",
"from index 'Products/ByCategoryPrice' where Price > 0 select facet(Category)");
Console.WriteLine();
// ─── S4: Range + WHERE + aggregations (scanning path) ───────────────
Console.WriteLine("── S4: Range + WHERE + aggregations (scanning path, read-once fix) ──");
Console.WriteLine(" Old Corax: O(N×M) seeks/doc New Corax: O(N) seeks, cached field value");
Header();
foreach (var n in new[] { 5, 10, 20, 50 })
{
var rql = $"from index 'Products/ByCategoryPrice' where Category = 'Electronics' select facet({BuildPriceRanges(n)}, sum(Stock), avg(Stock), min(Stock), max(Stock))";
await BenchRow($"Range×{n} + agg, WHERE Category='Electronics'", rql);
}
Console.WriteLine();
// ─── S5: Term + WHERE + aggregations ────────────────────────────────
Console.WriteLine("── S5: Term + WHERE + aggregations (scanning path) ──");
Header();
await BenchRow("Term(Category) + agg, WHERE Price>100",
"from index 'Products/ByCategoryPrice' where Price > 100 select facet(Category, sum(Stock), avg(Stock), min(Stock), max(Stock))");
Console.WriteLine();
// ─── S6: Baseline — no WHERE ────────────────────────────────────────
Console.WriteLine("── S6: Baseline — no WHERE (fast path, no change expected) ──");
Header();
await BenchRow("Range×50, no WHERE",
$"from index 'Products/ByCategoryPrice' select facet({BuildPriceRanges(50)})");
await BenchRow("Term(Category), no WHERE",
"from index 'Products/ByCategoryPrice' select facet(Category)");
Console.WriteLine();
}
Console.WriteLine(new string('=', 110));
Console.WriteLine("DONE");
Console.WriteLine(new string('=', 110));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment