Skip to content

Instantly share code, notes, and snippets.

@byronwai
Last active February 10, 2025 01:52
Show Gist options
  • Select an option

  • Save byronwai/ef5e8beab0d448c912ea40298d2d5952 to your computer and use it in GitHub Desktop.

Select an option

Save byronwai/ef5e8beab0d448c912ea40298d2d5952 to your computer and use it in GitHub Desktop.
import requests
import time
import re
import csv
# GitLab instance details
GITLAB_URL = "https://gitlab.example.com" # Replace with your GitLab instance URL
PRIVATE_TOKEN = "<your_access_token>" # Replace with your GitLab admin token
OUTPUT_CSV = "GitLab_AWS_Keys_Report.csv"
# Secret patterns (regex) - Add more patterns as needed
SECRET_PATTERNS = {
"AWS Access Key ID": r"AKIA[0-9A-Z]{16}",
"AWS Access Base64 Key ID": r"QUtJQ[a-zA-Z0-9+/]{22}[=]?",
"GitHub Personal Access Token": r"ghp_[0-9a-zA-Z]{36}",
"Slack API Token": r"xox[baprs]-[0-9a-zA-Z]{10,48}",
"Google API Key": r"AIza[0-9A-Za-z-_]{35}",
}
# GitLab API rate limit settings
RATE_LIMIT_SLEEP = 1 # Sleep 1 second between requests to prevent rate limiting
# ---------------------------
# 1️⃣ Get All Projects
# ---------------------------
def get_all_projects():
"""Fetches all projects from GitLab."""
projects = []
page = 1
while True:
url = f"{GITLAB_URL}/api/v4/projects?page={page}&per_page=100"
response = requests.get(url, headers={"PRIVATE-TOKEN": PRIVATE_TOKEN})
if response.status_code != 200:
print(f"❌ Error fetching projects: {response.text}")
break
data = response.json()
if not data:
break
projects.extend(data)
page += 1
time.sleep(RATE_LIMIT_SLEEP) # Rate limiting
return projects
# ---------------------------
# 2️⃣ Get All Branches in a Project
# ---------------------------
def get_all_branches(project_id):
"""Fetch all branches for a given project."""
branches = []
page = 1
while True:
url = f"{GITLAB_URL}/api/v4/projects/{project_id}/repository/branches?page={page}&per_page=100"
response = requests.get(url, headers={"PRIVATE-TOKEN": PRIVATE_TOKEN})
if response.status_code != 200:
print(f"❌ Error fetching branches for project {project_id}: {response.text}")
break
data = response.json()
if not data:
break
branches.extend([branch["name"] for branch in data])
page += 1
time.sleep(RATE_LIMIT_SLEEP) # Rate limiting
return branches
# ---------------------------
# 3️⃣ Search for Secrets in a Project's Branch
# ---------------------------
def search_secrets_in_branch(project_id, project_name, project_url, branch):
"""Search for secrets in repository files using GitLab's Search API for a specific branch."""
findings = []
page = 1
while True:
url = f"{GITLAB_URL}/api/v4/projects/{project_id}/search"
params = {
"scope": "blobs", # Search repository files
"search": "AKIA", # This triggers the search; actual regex filtering happens after
"ref": branch, # Search in specific branch
"page": page,
"per_page": 20
}
response = requests.get(url, headers={"PRIVATE-TOKEN": PRIVATE_TOKEN}, params=params)
if response.status_code == 429:
print("⚠️ Hit rate limit (429 Too Many Requests), sleeping before retrying...")
time.sleep(10) # Backoff strategy
continue # Retry the request
if response.status_code != 200:
print(f"❌ Error searching project {project_id} on branch {branch}: {response.text}")
break
results = response.json()
if not results:
break # No more search results
for item in results:
file_path = item.get("filename", "Unknown File")
content = item.get("data", "")
# Check all regex patterns
for data_type, pattern in SECRET_PATTERNS.items():
matches = re.findall(pattern, content)
if matches:
for secret in matches:
finding = [project_id, project_name, project_url, branch, file_path, data_type, secret]
findings.append(finding)
print(f"🚨 FOUND SECRET: {finding}")
page += 1 # Move to the next search page
time.sleep(RATE_LIMIT_SLEEP) # Rate limiting
return findings
# ---------------------------
# 4️⃣ Main Execution
# ---------------------------
def main():
"""Main function to search secrets across all projects and branches."""
print("🔍 Scanning GitLab projects for secrets...")
projects = get_all_projects()
results = []
for project in projects:
project_id = project["id"]
project_name = project["name_with_namespace"]
project_url = project["web_url"] # Get project URL
print(f"\n🔄 Searching project: {project_name} ({project_id})")
branches = get_all_branches(project_id)
for branch in branches:
print(f" 🔄 Checking branch: {branch}")
matches = search_secrets_in_branch(project_id, project_name, project_url, branch)
if matches:
print(f" 🚨 Secrets Found in {project_name} ({project_id}) - Branch: {branch}")
results.extend(matches)
# Save results as CSV with project URL, branch, and "Data Type" column
with open(OUTPUT_CSV, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["Project ID", "Project Name", "Project URL", "Branch", "File Path", "Data Type", "Secret Found"])
writer.writerows(results)
if results:
print(f"\n✅ Scan complete! Results saved in `{OUTPUT_CSV}`")
else:
print("\n✅ No secrets found in scanned projects.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment