Skip to content

Instantly share code, notes, and snippets.

@feniljain
Last active May 12, 2025 10:26
Show Gist options
  • Select an option

  • Save feniljain/9b6f118b87cb5a048782b8fbe8381b25 to your computer and use it in GitHub Desktop.

Select an option

Save feniljain/9b6f118b87cb5a048782b8fbe8381b25 to your computer and use it in GitHub Desktop.
Simple benchmarking setup and scripts
#!/bin/bash
# keep this off as we are relying on some cmds to fail
# for finding pids
# set -e
. ~/engine/envs/executor.env
cd ~/measure
./target/release/measure &
RUST_RECORDER_PID=$!
echo "Starting java benchmark"
cd ~/engine/e6-executor/
../cmd-3 &> ~/results/java-bench-out &
echo "trying to find pid"
while true; do
PID_STR=$(jps | grep "ForkedMain")
if [ -n "$PID_STR" ]; then
PID=$(echo "$PID_STR" | awk '{print $1}')
break
fi
sleep 1
done
echo "Starting pidstat on $PID"
pidstat -p $PID -o JSON -Hurd 1 &> ~/results/java-stats.json &
RECORDER_PID=$!
echo "waiting on recorder $RECORDER_PID"
wait $RECORDER_PID
kill $RUST_RECORDER_PID
mv ~/measure/stats.json ~/results/rs-java-stats.json
echo "Done running java benchmark"
# ===============================
cd ~/measure
./target/release/measure &
RUST_RECORDER_PID=$!
echo "Starting rust benchmark"
cd ~/datafusion-experiments/
cargo bench --bench count_distinct_agg &> ~/results/rust-bench-out &
echo "trying to find pid"
while true; do
PID=$(pgrep -f "target/release/deps/count_distinct_agg")
if [ $? -eq 0 ]; then
break
fi
sleep 1
done
echo "Starting pidstat on $PID"
pidstat -p $PID -o JSON -Hurd 1 &> ~/results/rust-stats.json &
RECORDER_PID=$!
echo "waiting on recorder $RECORDER_PID"
wait $RECORDER_PID
kill $RUST_RECORDER_PID
mv ~/measure/stats.json ~/results/rs-rust-stats.json
echo "Done running rust benchmark"
use sysinfo::System;
use serde::Serialize;
use std::{fs::OpenOptions, io::Write, thread, time};
#[derive(Serialize)]
struct UsageStats {
cpu_usage: f32,
used_memory: u64,
timestamp: String,
}
fn main() {
let mut sys = System::new_all();
loop {
sys.refresh_cpu_all();
sys.refresh_memory();
let stats = UsageStats {
cpu_usage: sys.global_cpu_usage(),
used_memory: sys.used_memory(),
timestamp: chrono::Utc::now().to_rfc3339(),
};
let json = serde_json::to_string(&stats).unwrap();
let mut file = OpenOptions::new()
.create(true)
.append(true)
.open("stats.json")
.unwrap();
writeln!(file, "{}", json).unwrap();
thread::sleep(time::Duration::from_secs(1));
}
}
sudo yum install tmux htop maven git java-21-amazon-corretto-devel docker gcc -y
echo "export JAVA_HOME=/etc/alternatives/java-21-amazon-corretto" >> ~/.bashrc
source ~/.bashrc
tmux kill-server
curl https://raw.githubusercontent.com/feniljain/dotfiles/refs/heads/main/tmux/.tmux.conf.minimal > ~/.tmux.conf
curl https://raw.githubusercontent.com/feniljain/dotfiles/refs/heads/main/vim/minimal-vimrc > ~/.vimrc
sudo systemctl start docker
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
. "$HOME/.cargo/env"
# pidstat output does not write complete json for some reason
echo "]}]}}" >> rust-stats.json
echo "]}]}}" >> java-stats.json
# getting insights from pidstat output
cat rust-stats.json | jq '[.sysstat.hosts[0].statistics[]."task-cpu-load"[].usr] | add/length' >> rust-measures
cat rust-stats.json | jq '[.sysstat.hosts[0].statistics[]."task-memory"[].RSS] | add/length' >> rust-measures
cat java-stats.json | jq '[.sysstat.hosts[0].statistics[]."task-cpu-load"[].usr] | add/length' >> java-measures
cat java-stats.json | jq '[.sysstat.hosts[0].statistics[]."task-memory"[].RSS] | add/length' >> java-measures
use rand::distr::{Alphabetic, SampleString};
use rand::prelude::*;
use std::{
error::Error,
fs::{create_dir_all, File},
io::Write,
};
use tokio::task::JoinSet;
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
create_dir_all("/tmp/vecs")?;
create_dir_all("/tmp/vecs/build")?;
create_dir_all("/tmp/vecs/probe")?;
let mut join_set = JoinSet::new();
// build chunk
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/build/int");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
let mut v = Vec::new();
for _ in 0..1_000_000 {
let r = rand::random::<i32>();
v.push(r);
s += &format!("{}\n", r);
}
writeln!(&mut f, "{}", s).unwrap();
let mut rng = rand::rng();
let file_name = format!("/tmp/vecs/probe/int");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let v1 = v.choose_multiple(&mut rng, 8192).collect::<Vec<&i32>>();
let mut s = String::new();
for ele in v1 {
s += &format!("{}\n", ele);
}
writeln!(&mut f, "{}", s).unwrap();
});
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/build/long");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
for _ in 0..1_000_000 {
s += &format!("{}\n", rand::random::<i64>());
}
writeln!(&mut f, "{}", s).unwrap();
});
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/build/double");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
for _ in 0..1_000_000 {
s += &format!("{}\n", rand::random::<f64>());
}
writeln!(&mut f, "{}", s).unwrap();
});
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/build/string");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
for _ in 0..1_000_000 {
s += &format!("{}\n", Alphabetic.sample_string(&mut rand::rng(), 20));
}
writeln!(&mut f, "{}", s).unwrap();
});
// probe chunk
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/probe/long");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
for _ in 0..8192 {
s += &format!("{}\n", rand::random::<i64>());
}
writeln!(&mut f, "{}", s).unwrap();
});
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/probe/float");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
for _ in 0..8192 {
s += &format!("{}\n", rand::random::<f32>());
}
writeln!(&mut f, "{}", s).unwrap();
});
join_set.spawn(async move {
let file_name = format!("/tmp/vecs/probe/boolean");
let mut f = File::options()
.create(true)
.append(true)
.open(file_name)
.unwrap();
let mut s = String::from("");
for _ in 0..8192 {
s += &format!("{}\n", rand::random::<bool>());
}
writeln!(&mut f, "{}", s).unwrap();
});
join_set.join_all().await;
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment