Skip to content

Instantly share code, notes, and snippets.

@robxx
Created April 22, 2024 01:31
Show Gist options
  • Select an option

  • Save robxx/456a16799923e88ae8aab6f3fa37337b to your computer and use it in GitHub Desktop.

Select an option

Save robxx/456a16799923e88ae8aab6f3fa37337b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# diastxt2.py
# translated by chatgpt 3.5
#
############################################################################
#
# diastxt2.py by Ron Starr
#
#
# Peforms diastic reading of a text following method outlined in
# Jackson Mac Low, _The Virginia Woolf Poems_. Inspired by
# Charles O. Hartman's original DIASTEXT.
#
# Program does no error checking--you're on your own.
#
#
# Command-line options:
#
# -l <number> Max words per line of output (default 6)
# -o <number> Max words of output (default 25)
#
# Program reads from standard input. Input has the following structure:
#
# 1st line - key phrase (Mac Low calls it the "title phrase")
# 2nd and succeeding lines - the text to be read
#
# All output is to standard output.
#
#
# Revision History
# 06/30/99 First version
# 07/01/99 Added max words per line of output
# 07/14/99 Added command-line options
# 08/28/99 Bug fix - leading blanks in key treated as word
#
############################################################################
import sys
import re
import getopt
# Set default values for options
MAXWORDS = 25
MAXPERLINE = 6
# Read command line options
opts, args = getopt.getopt(sys.argv[1:], "l:o:")
for opt, arg in opts:
if opt == "-l":
MAXPERLINE = int(arg)
elif opt == "-o":
MAXWORDS = int(arg)
# Read the key phrase
keyline = sys.stdin.readline().strip()
keyline = re.sub(r'[,:.?\"\'-]', ' ', keyline)
keywords = keyline.split()
# Generate search patterns from the key words
patterns = []
for keyword in keywords:
letters = list(keyword)
#patterns.extend([f"^{letters[j]}" + "\\w{" + str(j) + "}" + letters[j + 1] for j in range(len(letters))])
patterns.extend(["\\w{" + str(j) + "}" + letters[j] for j in range(len(letters))])
# Read the rest of the text
text = ' '.join(line.strip() for line in sys.stdin)
# Regularize whitespace and newlines in order to split text into words
text = re.sub(r'\s+\n', '\n', text)
text = re.sub(r'\n\s+', '\n', text)
text = re.sub(r'\n', '\n ', text)
text = re.sub(r' +', ' ', text)
textwords = text.split()
# Do the diastic reading
numpat = numword = 0
wordsout = 0
for _ in range(MAXWORDS):
wordschecked = 0
while True:
testtext = re.sub(r'^\W+', '', textwords[numword])
if re.match(patterns[numpat], testtext, re.IGNORECASE):
print(textwords[numword], end=' ')
testtext += "&&"
if re.search(r'([\W\n])&&$', testtext, re.MULTILINE):
if re.search(r'([\W\n])&&$', testtext, re.MULTILINE).group(1) != "\n":
print()
wordsout = 0
else:
wordsout += 1
if wordsout >= MAXPERLINE:
print()
wordsout = 0
numword = (numword + 1) % len(textwords)
break
else:
wordschecked += 1
if wordschecked > len(textwords):
print(f"[pattern not found: {patterns[numpat]}, skipping]")
break
numword = (numword + 1) % len(textwords)
numpat = (numpat + 1) % len(patterns)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment