Skip to content

Instantly share code, notes, and snippets.

@tkellen
Created April 3, 2025 06:42
Show Gist options
  • Select an option

  • Save tkellen/40d552366246245da73d51b0350b6e17 to your computer and use it in GitHub Desktop.

Select an option

Save tkellen/40d552366246245da73d51b0350b6e17 to your computer and use it in GitHub Desktop.

Revisions

  1. tkellen created this gist Apr 3, 2025.
    193 changes: 193 additions & 0 deletions inception.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,193 @@
    #!/bin/bash
    # Detect changed workspaces to drive a monorepo CI system. Supports bash 3.x+. The output
    # of this script is a JSON array of workflows to run. This script is generic and can be
    # used for any CI system.
    SCRIPT_PATH="$(cd "$(dirname "$0")"; pwd -P)"
    REPO_PATH=${REPO_PATH:-"$(cd "${SCRIPT_PATH}/../" && pwd -P)"}

    set -e # exit immediately on failures
    set -o pipefail # exit code of pipeline is code of first failure, not last
    set -o noglob # don't expand glob strings (bash 3x auto-expands some but not others)

    cat <<EOF >&2
    ██╗███╗░░██╗░█████╗░███████╗██████╗░████████╗██╗░█████╗░███╗░░██╗
    ██║████╗░██║██╔══██╗██╔════╝██╔══██╗╚══██╔══╝██║██╔══██╗████╗░██║
    ██║██╔██╗██║██║░░╚═╝█████╗░░██████╔╝░░░██║░░░██║██║░░██║██╔██╗██║
    ██║██║╚████║██║░░██╗██╔══╝░░██╔═══╝░░░░██║░░░██║██║░░██║██║╚████║
    ██║██║░╚███║╚█████╔╝███████╗██║░░░░░░░░██║░░░██║╚█████╔╝██║░╚███║
    ╚═╝╚═╝░░╚══╝░╚════╝░╚══════╝╚═╝░░░░░░░░╚═╝░░░╚═╝░╚════╝░╚═╝░░╚══╝
    EOF

    # Output debug messages to stderr.
    function debug {
    local BLUE="\033[34m"
    local WHITE="\033[37m"
    local RESET="\033[0m"
    echo -e "${BLUE}[$1]:${RESET} ${WHITE}$2${RESET}" >&2
    }

    # Support finding files in per-workspace paths using various globbing patterns.
    function search {
    local WORKSPACE_PATH="$1"
    local PATTERN="$2"
    (
    if [[ "$PATTERN" == "**/*"* ]]; then
    find "$WORKSPACE_PATH" -type f -path "$PATTERN" -exec readlink -f {} \;
    elif [[ "$PATTERN" == *"**/*"* ]]; then
    DIR="${PATTERN%/**/*}"
    MATCH="${PATTERN##*/}"
    find "$WORKSPACE_PATH/$DIR" -type f -name "$MATCH" -exec readlink -f {} \;
    elif [[ "$PATTERN" == *"/*"* ]]; then
    find "$WORKSPACE_PATH" -type f -path "$WORKSPACE_PATH/$PATTERN" -exec readlink -f {} \;
    else
    find "$WORKSPACE_PATH" -maxdepth 1 -type f -name "$PATTERN" -exec readlink -f {} \;
    fi
    ) || true
    }


    # Go go go!
    function inception {
    : "${WORKSPACE_ROOT?must be defined}"
    BASE_BRANCH=${BASE_BRANCH:-main}
    BEFORE_COMMIT=${BEFORE_COMMIT:-0000000000000000000000000000000000000000}
    CURRENT_COMMIT=${CURRENT_COMMIT:-HEAD}
    MANIFEST_FILE=${MANIFEST_FILE:-Inceptionfile}
    # The first step in making a CI system capable of handling multiple projects from a single
    # repository is to introduce logic to detect which projects were affected by a given commit.
    # This implies knowing which files have changed from one commit to the next, as well as which
    # files a given project is "watching". This sounds simple. In practice it can be rather complex.
    #
    # Most CI system provide access to the current and previous SHA of the commit that triggered a
    # given CI run. In the most basic case, which we assume first, the branch being pushed to already
    # exists and we are simply adding commits to it. The previous SHA is the previous commit to the
    # current branch.
    COMMIT_TYPE="standard-push"
    BASE_COMMIT="$BEFORE_COMMIT"
    # If the previous commit no longer exists, we assume history has been rewritten and the commit is
    # a force push. We treat this case the same as a new branch and consider the set of changed files
    # to be everything different than the latest commit on main.
    if ! git merge-base --is-ancestor "$BEFORE_COMMIT" "$CURRENT_COMMIT" 2>/dev/null; then
    COMMIT_TYPE="force-push"
    BASE_COMMIT=$(git merge-base "$CURRENT_COMMIT" "origin/$BASE_BRANCH")
    fi
    # In the case of pushing a new branch, the SHA of the previous commit is open to interpretation.
    # Is it the latest commit from main? What happens if you make a branch of a branch? What happens
    # if you delete the branch that you branched from before you push the new branch? For the
    # purposes of this script, we assume a new branch has no previous commit (sometimes represented
    # in CI systems as all zeros) and that we want to compare the new branch to the `main` branch by
    # default to determine the files changed.
    if [[ "$BEFORE_COMMIT" = "0000000000000000000000000000000000000000" ]]; then
    COMMIT_TYPE="new-branch"
    BASE_COMMIT=$(git merge-base "$CURRENT_COMMIT" "origin/$BASE_BRANCH")
    fi
    cd "$REPO_PATH"
    # A workspace path is a folder that contains a manifest file that defines workflows and which
    # files should trigger them.
    WORKSPACE_PATH=($(find "$WORKSPACE_ROOT" -type f -name "$MANIFEST_FILE" -exec dirname {} \;))
    WORKSPACE_FILES=()
    # Iterate all workspaces resolving paths to each watched file so we can check to see if they
    # have changed.
    for WORKSPACE_PATH in "${WORKSPACE_PATH[@]}"; do
    # Read the manifest file for metadata.
    NAME=$(yq ".workspace.name" "${WORKSPACE_PATH}/${MANIFEST_FILE}")
    WORKFLOWS=($(yq '.workspace.workflows | keys[]' "${WORKSPACE_PATH}/${MANIFEST_FILE}"))
    # A workspace can trigger arbitrary workflows. Find which workflows exist in this workspace.
    for WORKFLOW in "${WORKFLOWS[@]}"; do
    # Each workflow defines which files changing should trigger it. Find each of these for this workflow.
    TRIGGERS=($(yq ".workspace.workflows.$WORKFLOW.paths[]" "${WORKSPACE_PATH}/${MANIFEST_FILE}"))
    # Grab which CI runner should be used early, this is sometimes needed statically (e.g. github actions)
    RUNS_ON=$(yq -I=0 -o json ".workspace.workflows.$WORKFLOW.runs-on" "${WORKSPACE_PATH}/${MANIFEST_FILE}");
    for TRIGGER in "${TRIGGERS[@]}"; do
    # Expand trigger patterns (e.g. src/**/*) to a list of files that match.
    FILES=($(search "$WORKSPACE_PATH" "$TRIGGER" | sed "s|^$REPO_PATH/||"))
    # Annotate each file with details about the workspace and workflow it belongs to.
    for FILE in "${FILES[@]}"; do
    [[ -n "$FILE" ]] && WORKSPACE_FILES+=("$FILE|${WORKSPACE_PATH}|$NAME|$WORKFLOW|$RUNS_ON")
    done
    done
    done
    done
    # Populate identically indexed WATCHED_FILES and WORKFLOW_CONTEXTS (no maps in bash 3.x, boo).
    WATCHED_FILES=()
    WORKFLOW_CONTEXTS=()
    for ITEM in "${WORKSPACE_FILES[@]}"; do
    IFS="|" read -r FILE BASE_PATH NAME WORKFLOW RUNS_ON <<< "$ITEM"
    CONTEXT="{\"name\":\"$NAME\",\"workflow\":\"$WORKFLOW\",\"base-path\":\"${BASE_PATH}\",\"manifest\":\"${BASE_PATH}/${MANIFEST_FILE}\",\"runs-on\":$RUNS_ON}"
    WATCHED_FILES+=("$FILE")
    WORKFLOW_CONTEXTS+=("$CONTEXT")
    done
    # Ask git to tell us which files have actually changed.
    CHANGED_FILES=($(git diff --name-only $BASE_COMMIT $CURRENT_COMMIT))
    RUN=()
    # Check all changed files to determine which workflows to trigger.
    for CHANGED in "${CHANGED_FILES[@]}"; do
    for i in "${!WATCHED_FILES[@]}"; do
    FILE="${WATCHED_FILES[$i]}"
    CONTEXT="${WORKFLOW_CONTEXTS[$i]}"
    # If any of the watched files match those which have changed, record the context for it
    # in the list of things to run. Only match once per workflow to make this "fast".
    if [[ "$CHANGED" == "$FILE"* && ! " ${RUN[*]} " =~ " $CONTEXT " ]]; then
    RUN+=("$CONTEXT")
    fi
    done
    done
    # Output useful debugging info on stderr for humans running output from this script.
    for var in BASE_BRANCH BEFORE_COMMIT CURRENT_COMMIT REPO_PATH WORKSPACE_ROOT MANIFEST_FILE COMMIT_TYPE BASE_COMMIT; do
    debug "$var" "${!var}"
    done
    debug "CHANGED_FILES" "$(printf "\n%s" "${CHANGED_FILES[@]}" | sed 's/^/ /')"
    # Output a JSON array of affected workspaces and workflows for consumption by a CI system.
    if [[ ${#RUN[@]} -ne 0 ]]; then
    debug "RUN" "\n$(echo "${RUN[@]}" | tr ' ' '\n' | jq -r '.name+" ("+(.workflow)+")"' | sed 's/^/ /')"
    printf "[$(printf '%s,' "${RUN[@]}" | sed 's/,$//')]"
    else
    printf "[]"
    fi
    }

    # Here is a little test suite to make sure pattern matching does what is expected.
    function testsuite {
    cd "$REPO_PATH"
    mkdir -p test/pkg/fixtures
    touch test/.gitignore test/go.mod test/go.sum test/main.go test/README.md
    touch test/pkg/main.go test/pkg/main_test.go test/pkg/fixtures/test.json
    printf "Testing glob function on mock workspace:\n\n"
    tree -na --noreport test
    printf "\n"
    validate "none" "none" ""
    validate "single" "go.mod" "test/go.mod"
    validate "star" "*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go"
    validate "double-star" "**" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go"
    validate "star-dot-star" "*.*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go"
    validate "non-recursive-scoped" "*.go" "test/main.go"
    validate "recursive-all" "**/*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go test/pkg/fixtures/test.json test/pkg/main.go test/pkg/main_test.go"
    validate "recursive-scoped" "**/*.go" "test/main.go test/pkg/main.go test/pkg/main_test.go"
    validate "prefixed-recursive-all" "pkg/**/*" "test/pkg/fixtures/test.json test/pkg/main.go test/pkg/main_test.go"
    validate "prefixed-non-recursive-scoped" "pkg/*.go" "test/pkg/main.go test/pkg/main_test.go"
    validate "prefixed-recursive-scoped" "pkg/**/*.go" "test/pkg/main.go test/pkg/main_test.go"
    rm -rf test
    }

    function validate {
    local TITLE="$1"
    local PATTERN="$2"
    local EXPECTED="$3"
    printf "test $TITLE ($PATTERN): "
    EXPECTED="$(echo "$EXPECTED" | tr ' ' '\n')"
    ACTUAL=$(search "test" "$PATTERN" | sed "s|^$REPO_PATH/||")
    if [[ "$EXPECTED" == "$ACTUAL" ]]; then
    printf "OK\n"
    #printf "$ACTUAL\n" | sed 's/^/ /'
    else
    printf "FAIL\nExpected:\n$EXPECTED\nActual:\n$ACTUAL"
    fi
    }

    if [[ "$TEST" == "true" ]]; then
    testsuite
    else
    inception
    fi