William Brannon wwbrannon

Markdown and reStructuredText

GitHub supports several lightweight markup languages for documentation; the most popular ones (generally, not just at GitHub) are Markdown and reStructuredText. Markdown is sometimes considered easier to use, and is often preferred when the purpose is simply to generate HTML. On the other hand, reStructuredText is more extensible and powerful, with native support (not just embedded HTML) for tables, as well as things like automatic generation of tables of contents.

	#!/usr/bin/env python3

	"""
	find-duplicate-paragraphs.py

	A push-button script to identify duplicative or similar paragraphs across multiple LaTeX chapter files.

	Usage:
	python find_duplicate_paragraphs.py -i path_to_tex_files -o output_report.csv -t 0.8
	python find_duplicate_paragraphs.py -i path_to_tex_files -o output_report.csv -t 0.8 -m all-MiniLM-L6-v2

	import numpy as np
	import pandas as pd

	from scipy.integrate import simpson


	def interp_nan(data):
	assert data.ndim == 1

	data = data.copy()

	#!/bin/bash

	set -Eeuo pipefail
	set -x

	PYTHON_VERSION=3.10
	CUDA_VERSION=12.1.0
	PYTORCH_VERSION=2.2.2
	ENV_NAME='dl'

	#!/bin/bash

	set -xe

	export PYTHON_VERSION=3.9
	export CUDA_VERSION=11.3
	export JUPYTER_PORT="$(id -u)"

	##
	## Make SSL certs

	%%%%%% Put this part in the preamble
	%TC:ignore
	\usepackage{verbatim}
	\newcommand{\detailtexcount}[1]{%
	\immediate\write18{texcount -merge -sum -q #1.tex output.bbl > #1.wcdetail }%
	\verbatiminput{#1.wcdetail}%
	}
	%TC:endignore

	%%%%%% Put this part where you want the count to appear

	#!/usr/bin/env python3

	# Convert a jsonl file (one json document per line, each json doc consisting of
	# a dictionary with the same keys) to csv.

	import csv
	import json
	import argparse
	import contextlib

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# based on a script from Jeff Heaton: https://github.com/jeffheaton/article-code/blob/master/python/wikipedia/wiki-basic-stream.py

	import csv
	import json
	import logging
	import argparse
	import xml.etree.ElementTree as etree

	from sklearn.model_selection import train_test_split
	from utils import nnotnone, coalesce, grouper

	def train_val_test_split(*arrays, train_size=None, val_size=None,
	test_size=None, simplify=True, **kwargs):
	'''
	Split a set of numpy arrays into training, validation and test sets.

	Wraps around sklearn's train_test_split. Arrays are shuffled by default
	before being split, as is the default in the sklearn function, unless

	def grouper(it, n=None):
	assert n is None or n > 0

	if n is None:
	yield [x for x in it]
	else:
	ret = []

	for obj in it:
	if len(ret) == n: