Skip to content

Instantly share code, notes, and snippets.

@billyeh
Last active December 30, 2021 00:26
Show Gist options
  • Select an option

  • Save billyeh/7c1c8f8f9c6e3f30f281a578faab6a69 to your computer and use it in GitHub Desktop.

Select an option

Save billyeh/7c1c8f8f9c6e3f30f281a578faab6a69 to your computer and use it in GitHub Desktop.

Revisions

  1. billyeh revised this gist Dec 30, 2021. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions outline_formatter.gs
    Original file line number Diff line number Diff line change
    @@ -110,8 +110,8 @@ function romanNumeral(num) {
    function preformat(body) {
    let bodyText = body.getText();
    bodyText = bodyText.replace(/\n/g, '');
    bodyText = bodyText.replace(/\d+ CRYSTALLIZATION STUDY OUTLINES Message [A-Z][a-z]+ \(continuation\)/g, '');
    bodyText = bodyText.replace(/\d+ JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, '');
    bodyText = bodyText.replace(/\d+\s+CRYSTALLIZATION STUDY OUTLINES\s+Message [A-Z][a-z]+ \(continuation\)/g, '');
    bodyText = bodyText.replace(/\d+\s+JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, '');
    body.setText(bodyText);
    }

  2. billyeh revised this gist Jul 6, 2020. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions outline_formatter.gs
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,9 @@
    /**
    * Convenient script for formatting Google Docs converted from outline PDF files.
    * Simply copy/paste the outline text, and the script cleans up the whitespace
    * and correctly formats the document according to the Roman numerals found
    * in the text.
    /**
    * Calculates all the next Roman numeral strings we expect.
    * @return {Array} List of outline point strings to search the
  3. billyeh created this gist Jul 6, 2020.
    185 changes: 185 additions & 0 deletions outline_formatter.gs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,185 @@
    /**
    * Calculates all the next Roman numeral strings we expect.
    * @return {Array} List of outline point strings to search the
    * document for. For example, [' I. ', ' C. ', ' 1. '].
    */
    function nextOutlinePoints(indices) {
    const ret = [];
    for (let indentation = 0; indentation <= indices.length; indentation++) {
    const index = indentation === indices.length ? -1 : indices[indentation];
    const pointString = nextOutlinePointString(indentation, index + 1);
    if (pointString) {
    ret.push(' ' + pointString + '\\. ');
    }
    }
    return ret;
    }

    /**
    * Gets the string representation of an outline point given its indentation and index.
    * @param {number} indentation - Current indentation level.
    * @param {number} index - Which point at this indentation level to represent.
    * @return {String} The string representation of this outline point.
    */
    function nextOutlinePointString(indentation, index) {
    switch(indentation) {
    case 0: // Capital Roman numerals.
    return romanNumeral(index);
    case 1: // Capital Latin letters.
    return latinAlphabet(index).toUpperCase();
    case 2: // Arabic Numbers.
    return (index + 1).toString();
    case 3: // Lowercase Latin letters.
    return latinAlphabet(index).toLowerCase();
    }
    }

    /* Calculates the Latin letter representation of a number index. */
    function latinAlphabet(index) {
    let currentLetter = 'a';
    for (let i = 0; i < index; i++) {
    currentLetter = nextLatinAlphabet(currentLetter);
    }
    return currentLetter;
    }

    /**
    * Gets the next letter in the Latin alphabet, handling upper case and
    * wrapping (e.g. z -> aa.).
    * Cribbed from https://stackoverflow.com/a/31540111.
    */
    function nextLatinAlphabet(key) {
    if (key === 'Z' || key === 'z') {
    return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa
    } else {
    let lastChar = key.slice(-1);
    let sub = key.slice(0, -1);
    if (lastChar === 'Z' || lastChar === 'z') {
    // If a string of length > 1 ends in Z/z,
    // increment the string (excluding the last Z/z) recursively,
    // and append A/a (depending on casing) to it
    return nextLatinAlphabet(sub) + String.fromCharCode(lastChar.charCodeAt() - 25);
    } else {
    // (take till last char) append with (increment last char)
    return sub + String.fromCharCode(lastChar.charCodeAt() + 1);
    }
    }
    return key;
    }

    /**
    * Calculates the Roman numeral representation of a number index.
    * Cribbed from https://stackoverflow.com/a/41358305.
    */
    function romanNumeral(num) {
    num += 1;
    const ROMAN = {
    M: 1000,
    CM: 900,
    D: 500,
    CD: 400,
    C: 100,
    XC: 90,
    L: 50,
    XL: 40,
    X: 10,
    IX: 9,
    V: 5,
    IV: 4,
    I: 1,
    };
    let ret = [];
    for (let i of Object.keys(ROMAN)) {
    let q = Math.floor(num / ROMAN[i]);
    num -= q * ROMAN[i];
    ret.push(i.repeat(q));
    }
    return ret.join('');
    }

    /**
    * Cleans the outline before formatting it.
    * @param {Body} body - A document body element to clean.
    */
    function preformat(body) {
    let bodyText = body.getText();
    bodyText = bodyText.replace(/\n/g, '');
    bodyText = bodyText.replace(/\d+ CRYSTALLIZATION STUDY OUTLINES Message [A-Z][a-z]+ \(continuation\)/g, '');
    bodyText = bodyText.replace(/\d+ JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, '');
    body.setText(bodyText);
    }

    /**
    * Extracts outline text in the document into outline formatting.
    * @param {Body} body - A document body element to format.
    * @return {Array} A list of outline point text with the indentation level
    * of the following point. For example,
    * [
    * {text: "I. First point", indentation: 1},
    * {text: "A. Second point", indentation: 1},
    * ...
    * ]
    */
    function extractPoints(body) {
    let outlinePoint = [0];
    let nextIndex = Infinity;
    let texts = [];

    while (body.getText().length > 0) {
    let nextPossiblePoints = nextOutlinePoints(outlinePoint);
    let indentation = 0;
    for (let i = 0; i < nextPossiblePoints.length; i++) {
    let range = body.findText(nextPossiblePoints[i]);
    if (range && range.getStartOffset() < nextIndex) {
    nextIndex = range.getStartOffset();
    indentation = i;
    }
    }

    let text = body.getText().substr(0, nextIndex);
    text = text.substr(text.indexOf(' ') + 1);
    texts.push({
    text: text,
    indentation: indentation,
    });
    body.setText(body.getText().substr(nextIndex + 1));
    if (indentation >= outlinePoint.length) {
    outlinePoint.push(0);
    } else {
    outlinePoint[indentation]++;
    }
    outlinePoint = outlinePoint.slice(0, indentation + 1);
    nextIndex = Infinity;
    }

    return(texts);
    }

    /**
    * Creates ListItems with the correct indentation, given outline points
    * (see extractPoints).
    * @param(Body} body - A document body element.
    * @param{Array} points - A list of outline points.
    */
    function createOutline(body, points) {
    let currentIndentation = 0;
    let previousListItem = null;

    for (const point of points) {
    let currentListItem = body.appendListItem(point.text);
    currentListItem.setNestingLevel(currentIndentation);

    currentIndentation = point.indentation;
    previousListItem = currentListItem;
    }
    }

    function main() {
    const document = DocumentApp.getActiveDocument();
    const body = document.getBody();

    preformat(body);

    let points = extractPoints(body);
    createOutline(body, points);
    }