/** * Convenient script for formatting Google Docs converted from outline PDF files. * Simply copy/paste the outline text, and the script cleans up the whitespace * and correctly formats the document according to the Roman numerals found * in the text. /** * Calculates all the next Roman numeral strings we expect. * @return {Array} List of outline point strings to search the * document for. For example, [' I. ', ' C. ', ' 1. ']. */ function nextOutlinePoints(indices) { const ret = []; for (let indentation = 0; indentation <= indices.length; indentation++) { const index = indentation === indices.length ? -1 : indices[indentation]; const pointString = nextOutlinePointString(indentation, index + 1); if (pointString) { ret.push(' ' + pointString + '\\. '); } } return ret; } /** * Gets the string representation of an outline point given its indentation and index. * @param {number} indentation - Current indentation level. * @param {number} index - Which point at this indentation level to represent. * @return {String} The string representation of this outline point. */ function nextOutlinePointString(indentation, index) { switch(indentation) { case 0: // Capital Roman numerals. return romanNumeral(index); case 1: // Capital Latin letters. return latinAlphabet(index).toUpperCase(); case 2: // Arabic Numbers. return (index + 1).toString(); case 3: // Lowercase Latin letters. return latinAlphabet(index).toLowerCase(); } } /* Calculates the Latin letter representation of a number index. */ function latinAlphabet(index) { let currentLetter = 'a'; for (let i = 0; i < index; i++) { currentLetter = nextLatinAlphabet(currentLetter); } return currentLetter; } /** * Gets the next letter in the Latin alphabet, handling upper case and * wrapping (e.g. z -> aa.). * Cribbed from https://stackoverflow.com/a/31540111. */ function nextLatinAlphabet(key) { if (key === 'Z' || key === 'z') { return String.fromCharCode(key.charCodeAt() - 25) + String.fromCharCode(key.charCodeAt() - 25); // AA or aa } else { let lastChar = key.slice(-1); let sub = key.slice(0, -1); if (lastChar === 'Z' || lastChar === 'z') { // If a string of length > 1 ends in Z/z, // increment the string (excluding the last Z/z) recursively, // and append A/a (depending on casing) to it return nextLatinAlphabet(sub) + String.fromCharCode(lastChar.charCodeAt() - 25); } else { // (take till last char) append with (increment last char) return sub + String.fromCharCode(lastChar.charCodeAt() + 1); } } return key; } /** * Calculates the Roman numeral representation of a number index. * Cribbed from https://stackoverflow.com/a/41358305. */ function romanNumeral(num) { num += 1; const ROMAN = { M: 1000, CM: 900, D: 500, CD: 400, C: 100, XC: 90, L: 50, XL: 40, X: 10, IX: 9, V: 5, IV: 4, I: 1, }; let ret = []; for (let i of Object.keys(ROMAN)) { let q = Math.floor(num / ROMAN[i]); num -= q * ROMAN[i]; ret.push(i.repeat(q)); } return ret.join(''); } /** * Cleans the outline before formatting it. * @param {Body} body - A document body element to clean. */ function preformat(body) { let bodyText = body.getText(); bodyText = bodyText.replace(/\n/g, ''); bodyText = bodyText.replace(/\d+\s+CRYSTALLIZATION STUDY OUTLINES\s+Message [A-Z][a-z]+ \(continuation\)/g, ''); bodyText = bodyText.replace(/\d+\s+JEREMIAH AND LAMENTATIONS Message [A-Z][a-z]+ \(continuation\)/g, ''); body.setText(bodyText); } /** * Extracts outline text in the document into outline formatting. * @param {Body} body - A document body element to format. * @return {Array} A list of outline point text with the indentation level * of the following point. For example, * [ * {text: "I. First point", indentation: 1}, * {text: "A. Second point", indentation: 1}, * ... * ] */ function extractPoints(body) { let outlinePoint = [0]; let nextIndex = Infinity; let texts = []; while (body.getText().length > 0) { let nextPossiblePoints = nextOutlinePoints(outlinePoint); let indentation = 0; for (let i = 0; i < nextPossiblePoints.length; i++) { let range = body.findText(nextPossiblePoints[i]); if (range && range.getStartOffset() < nextIndex) { nextIndex = range.getStartOffset(); indentation = i; } } let text = body.getText().substr(0, nextIndex); text = text.substr(text.indexOf(' ') + 1); texts.push({ text: text, indentation: indentation, }); body.setText(body.getText().substr(nextIndex + 1)); if (indentation >= outlinePoint.length) { outlinePoint.push(0); } else { outlinePoint[indentation]++; } outlinePoint = outlinePoint.slice(0, indentation + 1); nextIndex = Infinity; } return(texts); } /** * Creates ListItems with the correct indentation, given outline points * (see extractPoints). * @param(Body} body - A document body element. * @param{Array} points - A list of outline points. */ function createOutline(body, points) { let currentIndentation = 0; let previousListItem = null; for (const point of points) { let currentListItem = body.appendListItem(point.text); currentListItem.setNestingLevel(currentIndentation); currentIndentation = point.indentation; previousListItem = currentListItem; } } function main() { const document = DocumentApp.getActiveDocument(); const body = document.getBody(); preformat(body); let points = extractPoints(body); createOutline(body, points); }