greg-randall · June 3, 2025 20:28
diff --git a/gistfile1.txt b/gistfile1.txt
 names = [
    "Anthony Smith",
    "Tony Smith", 
    "Maxwell Jones",
    "Max Jones",
    "Zachary Williams",
    "Zack Williams",
    "Patrick Moore",
    "Pat Moore",
    "Theodore Johnson",
    "Ted Johnson"
 ]


 from thefuzz import fuzz
 from itertools import combinations

 # List of titles to remove during comparison
 TITLES_TO_REMOVE = [
    'honorable',
    'the',
    'judge',
    'hon',
    'dr',
    'professor',
 ]

 nicknames = {
    # B names
    'bill': ['william', 'will', 'billy', 'willie'],
    'bob': ['robert', 'rob', 'bobby', 'robbie', 'bert'],
    'ben': ['benjamin', 'benji', 'benny'],
    'bert': ['herbert', 'albert', 'bertram', 'roberto'],
    
    # C names
    'chuck': ['charles', 'charlie', 'chas'],
    'chris': ['christopher', 'christian'],
    
    # D names
    'dave': ['david', 'davey', 'davie'],
    'dan': ['daniel', 'danny', 'dannie'],
    'dick': ['richard', 'rich', 'ricky', 'ricardo'],
    
    # E names
    'ed': ['edward', 'eddie', 'eduardo', 'edwin'],
    'eli': ['elijah', 'elias'],
    
    # F names
    'fred': ['frederick', 'freddie', 'fredrick'],
    'frank': ['francis', 'francisco', 'franklin'],
    
    # G names
    'greg': ['gregory', 'gregg', 'greggory'],
    'gabe': ['gabriel', 'gaby'],
    'gus': ['augustus', 'gustav', 'augusto'],
    
    # H names
    'hal': ['harold', 'harry'],
    'hank': ['henry', 'heinrich'],
    
    # J names
    'jim': ['james', 'jimmy', 'jamie'],
    'joe': ['joseph', 'joey', 'jose'],
    'jack': ['john', 'jonathan', 'johnny'],
    'jerry': ['jerome', 'gerald', 'geraldo'],
    'jeff': ['jeffrey', 'geoffrey'],
    
    # K names
    'ken': ['kenneth', 'kenny'],
    'kit': ['christopher', 'christian'],
    
    # L names
    'larry': ['lawrence', 'laurence', 'lorenzo'],
    'len': ['leonard', 'leonardo'],
    
    # M names
    'mike': ['michael', 'mickey', 'mick'],
    'matt': ['matthew', 'mathew', 'mateo'],
    'max': ['maxwell', 'maximilian', 'maximillian'],
    
    # N names
    'nick': ['nicholas', 'nicolas', 'nico'],
    'nat': ['nathan', 'nathaniel', 'nataniel'],
    
    # P names
    'pat': ['patrick', 'patricia'],
    'pete': ['peter', 'pedro'],
    'phil': ['phillip', 'philip', 'felipe'],
    
    # R names
    'ron': ['ronald', 'ronnie', 'ronny'],
    'ray': ['raymond', 'raymund'],
    'rick': ['richard', 'ricardo'],
    
    # S names
    'sam': ['samuel', 'sammy', 'sammie'],
    'stan': ['stanley', 'stanford'],
    'steve': ['steven', 'stephen', 'esteban'],
    
    # T names
    'ted': ['theodore', 'edmund', 'eduardo'],
    'tom': ['thomas', 'tommy', 'tomas'],
    'tony': ['anthony', 'antonio'],
    
    # V names
    'vic': ['victor', 'vincent'],
    'val': ['valentine', 'valentin'],
    
    # W names
    'walt': ['walter', 'wallace'],
    'will': ['william', 'wilhelm', 'guillermo'],
    
    # Z names
    'zack': ['zachary', 'zachariah', 'zach']
 }

 def normalize_name(name):
    name_lower = name.lower()
    for title in TITLES_TO_REMOVE:
        name_lower = name_lower.replace(title, '')
    return name_lower.strip()

 def get_name_parts(full_name):
    parts = normalize_name(full_name).split()
    if len(parts) >= 2:
        return ' '.join(parts[:-1]), parts[-1]
    return full_name, ''

 def are_names_similar(name1, name2, first_name_threshold=75, last_name_threshold=95):
    first1, last1 = get_name_parts(name1)
    first2, last2 = get_name_parts(name2)
    
    # First check if last names are similar enough
    last_name_ratio = fuzz.ratio(last1, last2)
    if last_name_ratio < last_name_threshold:
        return False
        
    # If last names match, check first names
    if first1 == first2:
        return True
        
    # Check nicknames
    first1_parts = first1.split()
    first2_parts = first2.split()
    
    for part1 in first1_parts:
        for part2 in first2_parts:
            for nickname, variants in nicknames.items():
                if (part1 == nickname and part2 in variants) or \
                   (part2 == nickname and part1 in variants):
                    return True
    
    # If no nickname matches, try fuzzy matching on first names
    first_name_ratio = fuzz.token_sort_ratio(first1, first2)
    return first_name_ratio >= first_name_threshold

 similar_pairs = []
 for name1, name2 in combinations(names, 2):
    if are_names_similar(name1, name2):
        similar_pairs.append((name1, name2))

 print("Potentially matching names:")
 for pair in similar_pairs:
    print(f"{pair[0]} <-> {pair[1]}")
	names = [
	"Anthony Smith",
	"Tony Smith",
	"Maxwell Jones",
	"Max Jones",
	"Zachary Williams",
	"Zack Williams",
	"Patrick Moore",
	"Pat Moore",
	"Theodore Johnson",
	"Ted Johnson"
	]


	from thefuzz import fuzz
	from itertools import combinations

	# List of titles to remove during comparison
	TITLES_TO_REMOVE = [
	'honorable',
	'the',
	'judge',
	'hon',
	'dr',
	'professor',
	]

	nicknames = {
	# B names
	'bill': ['william', 'will', 'billy', 'willie'],
	'bob': ['robert', 'rob', 'bobby', 'robbie', 'bert'],
	'ben': ['benjamin', 'benji', 'benny'],
	'bert': ['herbert', 'albert', 'bertram', 'roberto'],

	# C names
	'chuck': ['charles', 'charlie', 'chas'],
	'chris': ['christopher', 'christian'],

	# D names
	'dave': ['david', 'davey', 'davie'],
	'dan': ['daniel', 'danny', 'dannie'],
	'dick': ['richard', 'rich', 'ricky', 'ricardo'],

	# E names
	'ed': ['edward', 'eddie', 'eduardo', 'edwin'],
	'eli': ['elijah', 'elias'],

	# F names
	'fred': ['frederick', 'freddie', 'fredrick'],
	'frank': ['francis', 'francisco', 'franklin'],

	# G names
	'greg': ['gregory', 'gregg', 'greggory'],
	'gabe': ['gabriel', 'gaby'],
	'gus': ['augustus', 'gustav', 'augusto'],

	# H names
	'hal': ['harold', 'harry'],
	'hank': ['henry', 'heinrich'],

	# J names
	'jim': ['james', 'jimmy', 'jamie'],
	'joe': ['joseph', 'joey', 'jose'],
	'jack': ['john', 'jonathan', 'johnny'],
	'jerry': ['jerome', 'gerald', 'geraldo'],
	'jeff': ['jeffrey', 'geoffrey'],

	# K names
	'ken': ['kenneth', 'kenny'],
	'kit': ['christopher', 'christian'],

	# L names
	'larry': ['lawrence', 'laurence', 'lorenzo'],
	'len': ['leonard', 'leonardo'],

	# M names
	'mike': ['michael', 'mickey', 'mick'],
	'matt': ['matthew', 'mathew', 'mateo'],
	'max': ['maxwell', 'maximilian', 'maximillian'],

	# N names
	'nick': ['nicholas', 'nicolas', 'nico'],
	'nat': ['nathan', 'nathaniel', 'nataniel'],

	# P names
	'pat': ['patrick', 'patricia'],
	'pete': ['peter', 'pedro'],
	'phil': ['phillip', 'philip', 'felipe'],

	# R names
	'ron': ['ronald', 'ronnie', 'ronny'],
	'ray': ['raymond', 'raymund'],
	'rick': ['richard', 'ricardo'],

	# S names
	'sam': ['samuel', 'sammy', 'sammie'],
	'stan': ['stanley', 'stanford'],
	'steve': ['steven', 'stephen', 'esteban'],

	# T names
	'ted': ['theodore', 'edmund', 'eduardo'],
	'tom': ['thomas', 'tommy', 'tomas'],
	'tony': ['anthony', 'antonio'],

	# V names
	'vic': ['victor', 'vincent'],
	'val': ['valentine', 'valentin'],

	# W names
	'walt': ['walter', 'wallace'],
	'will': ['william', 'wilhelm', 'guillermo'],

	# Z names
	'zack': ['zachary', 'zachariah', 'zach']
	}

	def normalize_name(name):
	name_lower = name.lower()
	for title in TITLES_TO_REMOVE:
	name_lower = name_lower.replace(title, '')
	return name_lower.strip()

	def get_name_parts(full_name):
	parts = normalize_name(full_name).split()
	if len(parts) >= 2:
	return ' '.join(parts[:-1]), parts[-1]
	return full_name, ''

	def are_names_similar(name1, name2, first_name_threshold=75, last_name_threshold=95):
	first1, last1 = get_name_parts(name1)
	first2, last2 = get_name_parts(name2)

	# First check if last names are similar enough
	last_name_ratio = fuzz.ratio(last1, last2)
	if last_name_ratio < last_name_threshold:
	return False

	# If last names match, check first names
	if first1 == first2:
	return True

	# Check nicknames
	first1_parts = first1.split()
	first2_parts = first2.split()

	for part1 in first1_parts:
	for part2 in first2_parts:
	for nickname, variants in nicknames.items():
	if (part1 == nickname and part2 in variants) or \
	(part2 == nickname and part1 in variants):
	return True

	# If no nickname matches, try fuzzy matching on first names
	first_name_ratio = fuzz.token_sort_ratio(first1, first2)
	return first_name_ratio >= first_name_threshold

	similar_pairs = []
	for name1, name2 in combinations(names, 2):
	if are_names_similar(name1, name2):
	similar_pairs.append((name1, name2))

	print("Potentially matching names:")
	for pair in similar_pairs:
	print(f"{pair[0]} <-> {pair[1]}")
No results found