type SubseqAsyncResult = { sequence: string; matches: Array<{ string: string, score: number, positions: number[] }>; }; type SubseqStringTableEntry = { index: number, score: number, positions: number[] }; type SubseqStringTable = { [key: string]: null | SubseqStringTableEntry; }; // Table of char codes 0-255, where codes of all alphabetic characters // are mapped to those of their corresponding lowercase characters with // diacritics removed. E.g.: {`A`, `a`, `À`, ...} → `a` // Special case: AE ligature {`Æ`, `æ`} → `e` const charCodesGeneralized: number[] = (() => { const a = []; for (let i = 0; i < 256; i++) a.push(i); for (let i = 65; i < 91; i++) a[i] = i + 32; for (let i = 224; i < 230; i++) a[i] = 97; a[230] = 101; a[231] = 99; for (let i = 232; i < 236; i++) a[i] = 101; for (let i = 236; i < 240; i++) a[i] = 105; a[241] = 110; for (let i = 242; i < 247; i++) a[i] = 111; a[248] = 111; for (let i = 249; i < 253; i++) a[i] = 117; a[253] = 121; a[255] = 121; for (let i = 192; i < 215; i++) a[i] = a[i + 32]; for (let i = 216; i < 224; i++) a[i] = a[i + 32]; return a; })(); // Returns the sum of positions within `str` of each char in `sub` sequence. // Ranges from `0` if `str.startsWith(sub)` to `a*b - b*(b+1)/2` if // `str.endsWith(sub)`. Returns `alt` if `sub` is not a subsequence of `str`. function subseq (str: string, sub: string, alt = -1): number { const a = str.length; const b = sub.length; if (b < a) { let score = 0; iter_sub: for (let i = 0, j = 0; i < b; i++) { for (const code = sub.charCodeAt(i); j < a; j++) { if (code === charCodesGeneralized[str.charCodeAt(j)]) { score += j - i; continue iter_sub; } } return alt; } return score; } else if (b > a) return alt; else return sub === str ? 0 : alt; } // Given a set of `strings`, yield a subset, such that the concatenated sequence of // strings received from `source` is a valid subsequence of each string in the subset. async function* subseqAsync (strings: string[], source: Iterable) { // Table of data associated with each string in `strings`. const stringTable: SubseqStringTable = {}; // Subset of `strings` still valid under the subsequence received so far from `source`. // Initializes to an ordered set of all unique `strings`. let validStrings: string[] = []; strings.forEach(s => { if (!(s in stringTable)) { stringTable[s] = {index: 0, score: 0, positions: []}; validStrings.push(s); } }); // For sorting the set of valid strings prior to each `yield`. const compareByScore = (a: string, b: string): number => { const entryA = stringTable[a] as SubseqStringTableEntry; const entryB = stringTable[b] as SubseqStringTableEntry; return entryA.score - entryB.score; } let sequence = ''; for await (const part of source) { if (part === '') continue; const concat = sequence + part; const survivors: string[] = []; for (const string of validStrings) { const entry = stringTable[string]; if (entry == null) throw Error; let {index, score} = entry; if (concat.length < string.length) { iterating_part: for (let i = 0; i < part.length; i++) { for (const code = part.charCodeAt(i); index < string.length; index++) { if (code === charCodesGeneralized[string.charCodeAt(index)]) { entry.positions.push(index); score += index - i - sequence.length; continue iterating_part; } } stringTable[string] = null; break; } entry.index = index; entry.score = score; survivors.push(string); } else if (concat.length > string.length) { stringTable[string] = null; } else if (string === concat) { if (entry.score !== 0) throw Error; survivors.push(string); } else { stringTable[string] = null; } } sequence = concat; yield survivors .sort(compareByScore) .reduce((result, string) => { const entry = stringTable[string]; if (entry == null) throw Error; const {score, positions} = entry; result.matches.push({string, score, positions}); return result; }, {sequence, matches: []}); validStrings = survivors; } }