Last active
February 10, 2025 01:52
-
-
Save byronwai/ef5e8beab0d448c912ea40298d2d5952 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import time | |
| import re | |
| import csv | |
| # GitLab instance details | |
| GITLAB_URL = "https://gitlab.example.com" # Replace with your GitLab instance URL | |
| PRIVATE_TOKEN = "<your_access_token>" # Replace with your GitLab admin token | |
| OUTPUT_CSV = "GitLab_AWS_Keys_Report.csv" | |
| # Secret patterns (regex) - Add more patterns as needed | |
| SECRET_PATTERNS = { | |
| "AWS Access Key ID": r"AKIA[0-9A-Z]{16}", | |
| "AWS Access Base64 Key ID": r"QUtJQ[a-zA-Z0-9+/]{22}[=]?", | |
| "GitHub Personal Access Token": r"ghp_[0-9a-zA-Z]{36}", | |
| "Slack API Token": r"xox[baprs]-[0-9a-zA-Z]{10,48}", | |
| "Google API Key": r"AIza[0-9A-Za-z-_]{35}", | |
| } | |
| # GitLab API rate limit settings | |
| RATE_LIMIT_SLEEP = 1 # Sleep 1 second between requests to prevent rate limiting | |
| # --------------------------- | |
| # 1️⃣ Get All Projects | |
| # --------------------------- | |
| def get_all_projects(): | |
| """Fetches all projects from GitLab.""" | |
| projects = [] | |
| page = 1 | |
| while True: | |
| url = f"{GITLAB_URL}/api/v4/projects?page={page}&per_page=100" | |
| response = requests.get(url, headers={"PRIVATE-TOKEN": PRIVATE_TOKEN}) | |
| if response.status_code != 200: | |
| print(f"❌ Error fetching projects: {response.text}") | |
| break | |
| data = response.json() | |
| if not data: | |
| break | |
| projects.extend(data) | |
| page += 1 | |
| time.sleep(RATE_LIMIT_SLEEP) # Rate limiting | |
| return projects | |
| # --------------------------- | |
| # 2️⃣ Get All Branches in a Project | |
| # --------------------------- | |
| def get_all_branches(project_id): | |
| """Fetch all branches for a given project.""" | |
| branches = [] | |
| page = 1 | |
| while True: | |
| url = f"{GITLAB_URL}/api/v4/projects/{project_id}/repository/branches?page={page}&per_page=100" | |
| response = requests.get(url, headers={"PRIVATE-TOKEN": PRIVATE_TOKEN}) | |
| if response.status_code != 200: | |
| print(f"❌ Error fetching branches for project {project_id}: {response.text}") | |
| break | |
| data = response.json() | |
| if not data: | |
| break | |
| branches.extend([branch["name"] for branch in data]) | |
| page += 1 | |
| time.sleep(RATE_LIMIT_SLEEP) # Rate limiting | |
| return branches | |
| # --------------------------- | |
| # 3️⃣ Search for Secrets in a Project's Branch | |
| # --------------------------- | |
| def search_secrets_in_branch(project_id, project_name, project_url, branch): | |
| """Search for secrets in repository files using GitLab's Search API for a specific branch.""" | |
| findings = [] | |
| page = 1 | |
| while True: | |
| url = f"{GITLAB_URL}/api/v4/projects/{project_id}/search" | |
| params = { | |
| "scope": "blobs", # Search repository files | |
| "search": "AKIA", # This triggers the search; actual regex filtering happens after | |
| "ref": branch, # Search in specific branch | |
| "page": page, | |
| "per_page": 20 | |
| } | |
| response = requests.get(url, headers={"PRIVATE-TOKEN": PRIVATE_TOKEN}, params=params) | |
| if response.status_code == 429: | |
| print("⚠️ Hit rate limit (429 Too Many Requests), sleeping before retrying...") | |
| time.sleep(10) # Backoff strategy | |
| continue # Retry the request | |
| if response.status_code != 200: | |
| print(f"❌ Error searching project {project_id} on branch {branch}: {response.text}") | |
| break | |
| results = response.json() | |
| if not results: | |
| break # No more search results | |
| for item in results: | |
| file_path = item.get("filename", "Unknown File") | |
| content = item.get("data", "") | |
| # Check all regex patterns | |
| for data_type, pattern in SECRET_PATTERNS.items(): | |
| matches = re.findall(pattern, content) | |
| if matches: | |
| for secret in matches: | |
| finding = [project_id, project_name, project_url, branch, file_path, data_type, secret] | |
| findings.append(finding) | |
| print(f"🚨 FOUND SECRET: {finding}") | |
| page += 1 # Move to the next search page | |
| time.sleep(RATE_LIMIT_SLEEP) # Rate limiting | |
| return findings | |
| # --------------------------- | |
| # 4️⃣ Main Execution | |
| # --------------------------- | |
| def main(): | |
| """Main function to search secrets across all projects and branches.""" | |
| print("🔍 Scanning GitLab projects for secrets...") | |
| projects = get_all_projects() | |
| results = [] | |
| for project in projects: | |
| project_id = project["id"] | |
| project_name = project["name_with_namespace"] | |
| project_url = project["web_url"] # Get project URL | |
| print(f"\n🔄 Searching project: {project_name} ({project_id})") | |
| branches = get_all_branches(project_id) | |
| for branch in branches: | |
| print(f" 🔄 Checking branch: {branch}") | |
| matches = search_secrets_in_branch(project_id, project_name, project_url, branch) | |
| if matches: | |
| print(f" 🚨 Secrets Found in {project_name} ({project_id}) - Branch: {branch}") | |
| results.extend(matches) | |
| # Save results as CSV with project URL, branch, and "Data Type" column | |
| with open(OUTPUT_CSV, "w", newline="") as f: | |
| writer = csv.writer(f) | |
| writer.writerow(["Project ID", "Project Name", "Project URL", "Branch", "File Path", "Data Type", "Secret Found"]) | |
| writer.writerows(results) | |
| if results: | |
| print(f"\n✅ Scan complete! Results saved in `{OUTPUT_CSV}`") | |
| else: | |
| print("\n✅ No secrets found in scanned projects.") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment