n1ckfg · March 14, 2026 14:22
diff --git a/estimate_tokens.sh b/estimate_tokens.sh
 #!/bin/bash

 # Default directory is current directory, or accept as first argument
 DIR="${1:-.}"

 echo "Scanning directory: $DIR"
 echo "This may take a moment depending on the project size..."

 # Find text files, excluding common binary, dependency, or large cache directories across various platforms.
 CHAR_COUNT=$(find "$DIR" -type f \
  -not -path "*/\.git/*" \
  -not -path "*/\.svn/*" \
  -not -path "*/\.hg/*" \
  -not -path "*/\.idea/*" \
  -not -path "*/\.vscode/*" \
  -not -path "*/\.vs/*" \
  -not -path "*/__pycache__/*" \
  -not -path "*/venv/*" \
  -not -path "*/\.venv/*" \
  -not -path "*/env/*" \
  -not -path "*/\.env/*" \
  -not -path "*/node_modules/*" \
  -not -path "*/dist/*" \
  -not -path "*/build/*" \
  -not -path "*/Build/*" \
  -not -path "*/target/*" \
  -not -path "*/bin/*" \
  -not -path "*/Binaries/*" \
  -not -path "*/obj/*" \
  -not -path "*/out/*" \
  -not -path "*/vendor/*" \
  -not -path "*/Library/*" \
  -not -path "*/Temp/*" \
  -not -path "*/DerivedData/*" \
  -not -path "*/Pods/*" \
  -not -path "*/\.gradle/*" \
  -not -path "*/packages/*" \
  -not -path "*/DerivedDataCache/*" \
  -not -path "*/Intermediate/*" \
  -not -path "*/Saved/*" \
  -not -path "*/\.dart_tool/*" \
  -not -path "*/\.next/*" \
  -not -path "*/\.nuxt/*" \
  -not -path "*/\.svelte-kit/*" \
  -not -path "*/coverage/*" \
  -not -path "*/checkpoints/*" \
  -not -name "*.egg-info*" \
  -not -name "*.xcodeproj*" \
  -not -name "*.xcworkspace*" \
  -exec file {} + 2>/dev/null | grep -i "text" | cut -d: -f1 | tr '\n' '\0' | xargs -0 cat 2>/dev/null | wc -c | tr -d ' ')

 if [ -z "$CHAR_COUNT" ]; then
  CHAR_COUNT=0
 fi

 # 1 token is approximately 4 characters for English text/code
 TOKEN_ESTIMATE=$((CHAR_COUNT / 4))

 # Format numbers with commas for readability if the system supports it
 if LC_NUMERIC=en_US.UTF-8 printf "%'d" 1 >/dev/null 2>&1; then
  LC_NUMERIC=en_US.UTF-8 printf "Total Characters: %'d\n" "$CHAR_COUNT"
  LC_NUMERIC=en_US.UTF-8 printf "Estimated Tokens: ~%'d\n" "$TOKEN_ESTIMATE"
 else
  echo "Total Characters: $CHAR_COUNT"
  echo "Estimated Tokens: ~$TOKEN_ESTIMATE"
 fi
	#!/bin/bash

	# Default directory is current directory, or accept as first argument
	DIR="${1:-.}"

	echo "Scanning directory: $DIR"
	echo "This may take a moment depending on the project size..."

	# Find text files, excluding common binary, dependency, or large cache directories across various platforms.
	CHAR_COUNT=$(find "$DIR" -type f \
	-not -path "/\.git/" \
	-not -path "/\.svn/" \
	-not -path "/\.hg/" \
	-not -path "/\.idea/" \
	-not -path "/\.vscode/" \
	-not -path "/\.vs/" \
	-not -path "/__pycache__/" \
	-not -path "/venv/" \
	-not -path "/\.venv/" \
	-not -path "/env/" \
	-not -path "/\.env/" \
	-not -path "/node_modules/" \
	-not -path "/dist/" \
	-not -path "/build/" \
	-not -path "/Build/" \
	-not -path "/target/" \
	-not -path "/bin/" \
	-not -path "/Binaries/" \
	-not -path "/obj/" \
	-not -path "/out/" \
	-not -path "/vendor/" \
	-not -path "/Library/" \
	-not -path "/Temp/" \
	-not -path "/DerivedData/" \
	-not -path "/Pods/" \
	-not -path "/\.gradle/" \
	-not -path "/packages/" \
	-not -path "/DerivedDataCache/" \
	-not -path "/Intermediate/" \
	-not -path "/Saved/" \
	-not -path "/\.dart_tool/" \
	-not -path "/\.next/" \
	-not -path "/\.nuxt/" \
	-not -path "/\.svelte-kit/" \
	-not -path "/coverage/" \
	-not -path "/checkpoints/" \
	-not -name ".egg-info" \
	-not -name ".xcodeproj" \
	-not -name ".xcworkspace" \
	-exec file {} + 2>/dev/null \| grep -i "text" \| cut -d: -f1 \| tr '\n' '\0' \| xargs -0 cat 2>/dev/null \| wc -c \| tr -d ' ')

	if [ -z "$CHAR_COUNT" ]; then
	CHAR_COUNT=0
	fi

	# 1 token is approximately 4 characters for English text/code
	TOKEN_ESTIMATE=$((CHAR_COUNT / 4))

	# Format numbers with commas for readability if the system supports it
	if LC_NUMERIC=en_US.UTF-8 printf "%'d" 1 >/dev/null 2>&1; then
	LC_NUMERIC=en_US.UTF-8 printf "Total Characters: %'d\n" "$CHAR_COUNT"
	LC_NUMERIC=en_US.UTF-8 printf "Estimated Tokens: ~%'d\n" "$TOKEN_ESTIMATE"
	else
	echo "Total Characters: $CHAR_COUNT"
	echo "Estimated Tokens: ~$TOKEN_ESTIMATE"
	fi
No results found