Created
April 22, 2024 01:31
-
-
Save robxx/456a16799923e88ae8aab6f3fa37337b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # | |
| # diastxt2.py | |
| # translated by chatgpt 3.5 | |
| # | |
| ############################################################################ | |
| # | |
| # diastxt2.py by Ron Starr | |
| # | |
| # | |
| # Peforms diastic reading of a text following method outlined in | |
| # Jackson Mac Low, _The Virginia Woolf Poems_. Inspired by | |
| # Charles O. Hartman's original DIASTEXT. | |
| # | |
| # Program does no error checking--you're on your own. | |
| # | |
| # | |
| # Command-line options: | |
| # | |
| # -l <number> Max words per line of output (default 6) | |
| # -o <number> Max words of output (default 25) | |
| # | |
| # Program reads from standard input. Input has the following structure: | |
| # | |
| # 1st line - key phrase (Mac Low calls it the "title phrase") | |
| # 2nd and succeeding lines - the text to be read | |
| # | |
| # All output is to standard output. | |
| # | |
| # | |
| # Revision History | |
| # 06/30/99 First version | |
| # 07/01/99 Added max words per line of output | |
| # 07/14/99 Added command-line options | |
| # 08/28/99 Bug fix - leading blanks in key treated as word | |
| # | |
| ############################################################################ | |
| import sys | |
| import re | |
| import getopt | |
| # Set default values for options | |
| MAXWORDS = 25 | |
| MAXPERLINE = 6 | |
| # Read command line options | |
| opts, args = getopt.getopt(sys.argv[1:], "l:o:") | |
| for opt, arg in opts: | |
| if opt == "-l": | |
| MAXPERLINE = int(arg) | |
| elif opt == "-o": | |
| MAXWORDS = int(arg) | |
| # Read the key phrase | |
| keyline = sys.stdin.readline().strip() | |
| keyline = re.sub(r'[,:.?\"\'-]', ' ', keyline) | |
| keywords = keyline.split() | |
| # Generate search patterns from the key words | |
| patterns = [] | |
| for keyword in keywords: | |
| letters = list(keyword) | |
| #patterns.extend([f"^{letters[j]}" + "\\w{" + str(j) + "}" + letters[j + 1] for j in range(len(letters))]) | |
| patterns.extend(["\\w{" + str(j) + "}" + letters[j] for j in range(len(letters))]) | |
| # Read the rest of the text | |
| text = ' '.join(line.strip() for line in sys.stdin) | |
| # Regularize whitespace and newlines in order to split text into words | |
| text = re.sub(r'\s+\n', '\n', text) | |
| text = re.sub(r'\n\s+', '\n', text) | |
| text = re.sub(r'\n', '\n ', text) | |
| text = re.sub(r' +', ' ', text) | |
| textwords = text.split() | |
| # Do the diastic reading | |
| numpat = numword = 0 | |
| wordsout = 0 | |
| for _ in range(MAXWORDS): | |
| wordschecked = 0 | |
| while True: | |
| testtext = re.sub(r'^\W+', '', textwords[numword]) | |
| if re.match(patterns[numpat], testtext, re.IGNORECASE): | |
| print(textwords[numword], end=' ') | |
| testtext += "&&" | |
| if re.search(r'([\W\n])&&$', testtext, re.MULTILINE): | |
| if re.search(r'([\W\n])&&$', testtext, re.MULTILINE).group(1) != "\n": | |
| print() | |
| wordsout = 0 | |
| else: | |
| wordsout += 1 | |
| if wordsout >= MAXPERLINE: | |
| print() | |
| wordsout = 0 | |
| numword = (numword + 1) % len(textwords) | |
| break | |
| else: | |
| wordschecked += 1 | |
| if wordschecked > len(textwords): | |
| print(f"[pattern not found: {patterns[numpat]}, skipping]") | |
| break | |
| numword = (numword + 1) % len(textwords) | |
| numpat = (numpat + 1) % len(patterns) | |
| print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment