Created
March 14, 2026 14:22
-
-
Save n1ckfg/1326ef9b68808760e8fe643713142710 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Default directory is current directory, or accept as first argument | |
| DIR="${1:-.}" | |
| echo "Scanning directory: $DIR" | |
| echo "This may take a moment depending on the project size..." | |
| # Find text files, excluding common binary, dependency, or large cache directories across various platforms. | |
| CHAR_COUNT=$(find "$DIR" -type f \ | |
| -not -path "*/\.git/*" \ | |
| -not -path "*/\.svn/*" \ | |
| -not -path "*/\.hg/*" \ | |
| -not -path "*/\.idea/*" \ | |
| -not -path "*/\.vscode/*" \ | |
| -not -path "*/\.vs/*" \ | |
| -not -path "*/__pycache__/*" \ | |
| -not -path "*/venv/*" \ | |
| -not -path "*/\.venv/*" \ | |
| -not -path "*/env/*" \ | |
| -not -path "*/\.env/*" \ | |
| -not -path "*/node_modules/*" \ | |
| -not -path "*/dist/*" \ | |
| -not -path "*/build/*" \ | |
| -not -path "*/Build/*" \ | |
| -not -path "*/target/*" \ | |
| -not -path "*/bin/*" \ | |
| -not -path "*/Binaries/*" \ | |
| -not -path "*/obj/*" \ | |
| -not -path "*/out/*" \ | |
| -not -path "*/vendor/*" \ | |
| -not -path "*/Library/*" \ | |
| -not -path "*/Temp/*" \ | |
| -not -path "*/DerivedData/*" \ | |
| -not -path "*/Pods/*" \ | |
| -not -path "*/\.gradle/*" \ | |
| -not -path "*/packages/*" \ | |
| -not -path "*/DerivedDataCache/*" \ | |
| -not -path "*/Intermediate/*" \ | |
| -not -path "*/Saved/*" \ | |
| -not -path "*/\.dart_tool/*" \ | |
| -not -path "*/\.next/*" \ | |
| -not -path "*/\.nuxt/*" \ | |
| -not -path "*/\.svelte-kit/*" \ | |
| -not -path "*/coverage/*" \ | |
| -not -path "*/checkpoints/*" \ | |
| -not -name "*.egg-info*" \ | |
| -not -name "*.xcodeproj*" \ | |
| -not -name "*.xcworkspace*" \ | |
| -exec file {} + 2>/dev/null | grep -i "text" | cut -d: -f1 | tr '\n' '\0' | xargs -0 cat 2>/dev/null | wc -c | tr -d ' ') | |
| if [ -z "$CHAR_COUNT" ]; then | |
| CHAR_COUNT=0 | |
| fi | |
| # 1 token is approximately 4 characters for English text/code | |
| TOKEN_ESTIMATE=$((CHAR_COUNT / 4)) | |
| # Format numbers with commas for readability if the system supports it | |
| if LC_NUMERIC=en_US.UTF-8 printf "%'d" 1 >/dev/null 2>&1; then | |
| LC_NUMERIC=en_US.UTF-8 printf "Total Characters: %'d\n" "$CHAR_COUNT" | |
| LC_NUMERIC=en_US.UTF-8 printf "Estimated Tokens: ~%'d\n" "$TOKEN_ESTIMATE" | |
| else | |
| echo "Total Characters: $CHAR_COUNT" | |
| echo "Estimated Tokens: ~$TOKEN_ESTIMATE" | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment