Skip to content

Instantly share code, notes, and snippets.

@Akiira
Created July 12, 2021 19:37
Show Gist options
  • Select an option

  • Save Akiira/026399029bb2dd459acc47bdd3e5bcb9 to your computer and use it in GitHub Desktop.

Select an option

Save Akiira/026399029bb2dd459acc47bdd3e5bcb9 to your computer and use it in GitHub Desktop.
This is typically a result of copy and pasting text from Windows -> Mac/Linux and the source encoding & formatting metadata is retained. Use `sed` to strip all violating characters.

Depending on the terminal, execute in one of the following ways:

Direct reference to file:

sed -e $'s/\xC2\xA0/ /g' -e $'s/\xE2\x80\x8b//g' -i /path/to/your/file

Pipe file contents to sed

cat /path/to/your/file | sed -e $'s/\xC2\xA0/ /g' -e $'s/\xE2\x80\x8b//g'

Or copy and paste the following into a sh script and call with file as param:

#!/bin/bash

set -uex -o pipefail

echo_err() {
    printf "%s\n" "$*" >&2;
}

echo_usage() {
    cat << EOF
Usage:
    rm-unicode-nb-spaces FILE_PATH

Example:
    Run with bash:
    /bin/bash -c ./rm-unicode-nb-spaces /home/jessenich/windows-file.txt

    Run as executable in current terminal:
    chmod +x ./rm-unicode-nb-spaces
    rm-unicode-nb-spaces /home/jessenich/windows-file.txt
EOF
}

mk_backup() {
    FILE="$1"
    # Check file exists and is of non-zero length
    if [ ! -s "${FILE}" ]; then
        # Couldn't find file, return error exit code.
        echo_err "File ${FILE} not found";
        exit 1;
    else
        # Found file, backup original just in case.
        echo "Found ${FILE}.";
        echo "Backing up ${FILE} to ${FILE}.bak...";
        COPY_WITH_PRESERVE=$(cp -i -p "${FILE}" "${FILE}.bak");
        if [ "${COPY_WITH_PRESERVE}" != 0 ]; then
            echo_err "Error occurred creating file backup.";
            echo_err "Exiting with error code received from 'cp'.";
            exit "${COPY_WITH_PRESERVE}"
        else
            echo "Backed up original file contents to ${FILE}.bak";
        fi
    fi
}

replace_unicode_nb_spaces() {
    FILE="$1"

    # Attempt both replacement methods, store result in variable
    SED_OUTPUT=$(
        sed -e $'s/\xC2\xA0/ /g' -e $'s/\xE2\x80\x8b//g' -i "${FILE}" || \
        cat "${FILE}" | sed -e $'s/\xC2\xA0/ /g' -e $'s/\xE2\x80\x8b//g');

    # Tee output to terminal and to original file.
    echo "${SED_OUTPUT}" | tee "${FILE}"
}

mk_backup "$1"
replace_unicode_nb_spaces "$1"

echo ""
echo "Review the above printout. If correct, would you like to delete the backup file? [y/N]"

IFS= read -r DELETE_BACKUP
if [ "${DELETE_BACKUP}" = "[Yy]" ]; then
    rm -f "${1}.bak";
    echo "Removed backup file."
fi

exit 0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment