Skip to content

Instantly share code, notes, and snippets.

@mlnance
Forked from tdudgeon/conf_gen.py
Last active January 4, 2023 22:34
Show Gist options
  • Select an option

  • Save mlnance/30e7f8e06efb175e847b07153ee5351f to your computer and use it in GitHub Desktop.

Select an option

Save mlnance/30e7f8e06efb175e847b07153ee5351f to your computer and use it in GitHub Desktop.
Conformer generation using RDKit
#!/usr/bin/env python3
__author__ = "tdudgeon" # externally developed code
"""
Source: https://gist.github.com/tdudgeon/b061dc67f9d879905b50118408c30aac
Notes: Requires `conda activate rdkit` on ranger
Edits: mlnance
Date: 1 August 2022
"""
################################################################################
###########
# IMPORTS #
###########
import sys
from rdkit import Chem
from rdkit.Chem import AllChem, TorsionFingerprints
from rdkit.ML.Cluster import Butina
################################################################################
###########
# METHODS #
###########
def gen_conformers(mol,
numConfs=100,
maxAttempts=1000,
pruneRmsThresh=0.1,
useExpTorsionAnglePrefs=True,
useBasicKnowledge=True,
useMacrocycleTorsions=True, # MLN
enforceChirality=True,
numThreads=4):
ids = AllChem.EmbedMultipleConfs(mol,
numConfs=numConfs,
maxAttempts=maxAttempts,
pruneRmsThresh=pruneRmsThresh,
useExpTorsionAnglePrefs=useExpTorsionAnglePrefs,
useBasicKnowledge=useBasicKnowledge,
enforceChirality=enforceChirality,
numThreads=numThreads)
return list(ids)
def write_conformers_to_sdf(mol,
filename,
rmsClusters,
conformerPropsDict,
minEnergy):
w = Chem.SDWriter(filename)
for cluster in rmsClusters:
for confId in cluster:
for name in mol.GetPropNames():
mol.ClearProp(name)
conformerProps = conformerPropsDict[confId]
mol.SetIntProp("conformer_id", confId + 1)
for key in conformerProps.keys():
mol.SetProp(key, str(conformerProps[key]))
e = conformerProps["energy_abs"]
if e:
mol.SetDoubleProp("energy_delta", e - minEnergy)
w.write(mol, confId=confId)
w.flush()
w.close()
def calc_energy(mol,
conformerId,
mmffVariant,
minimizeIts):
results = {}
# single-threaded minimization approach
ff = AllChem.MMFFGetMoleculeForceField(mol,
AllChem.MMFFGetMoleculeProperties(mol,
mmffVariant=mmffVariant),
confId=conformerId)
ff.Initialize()
ff.CalcEnergy()
if minimizeIts > 0:
results["converged"] = ff.Minimize(maxIts=minimizeIts)
results["energy_abs"] = ff.CalcEnergy()
return results
def cluster_conformers(mol,
mode="RMSD",
threshold=2.0):
if mode == "TFD":
dmat = TorsionFingerprints.GetTFDMatrix(mol)
else:
dmat = AllChem.GetConformerRMSMatrix(mol,
prealigned=False)
rms_clusters = Butina.ClusterData(dmat,
mol.GetNumConformers(),
threshold,
isDistData=True,
reordering=True)
return rms_clusters
def align_conformer_clusters(mol,
clust_ids):
rmslist = []
AllChem.AlignMolConformers(mol,
confIds=clust_ids,
RMSlist=rmslist)
return rmslist
################################################################################
########
# MAIN #
########
if len(sys.argv) < 4:
print("Usage: conf_gen.py <sdf input> <num conformers> <max attempts> <prune threshold> <cluster method: (RMSD|TFD) = RMSD> <cluster threshold = 0.2> <minimize iterations: = 0> <MMFF variant: = (MMFF94|MMFF94s) = MMFF94")
sys.exit()
input_file = sys.argv[1]
numConfs = int(sys.argv[2])
maxAttempts = int(sys.argv[3])
pruneRmsThresh = float(sys.argv[4])
if len(sys.argv) > 5: clusterMethod = sys.argv[5]
else: clusterMethod = "RMSD"
if len(sys.argv) > 6: clusterThreshold = float(sys.argv[6])
else: clusterThreshold = 2.0
if len(sys.argv) > 7: minimizeIterations = int(sys.argv[7])
else: minimizeIterations = 0
if len(sys.argv) > 8: mmffVariant = sys.argv[8]
else: mmffVariant = "MMFF94"
suppl = Chem.ForwardSDMolSupplier(input_file)
i=0
for mol in suppl:
i = i+1
if mol is None: continue
m = Chem.AddHs(mol)
Chem.rdmolops.AssignStereochemistryFrom3D(mol)
Chem.rdmolops.AssignAtomChiralTagsFromStructure(mol)
refm = Chem.Mol(m)
# generate the confomers
print("Generating conformers for molecule ", i)
conformerIds = gen_conformers(m, numConfs, maxAttempts, pruneRmsThresh, True, True, True)
conformerPropsDict = {}
if minimizeIterations:
print("Minimizing and calculating energies of conformers of molecule ", i)
else:
print("Calculating energies of conformers of molecule ", i)
for conformerId in conformerIds:
# energy minimise (optional) and energy calculation
props = calc_energy(m, conformerId, mmffVariant, minimizeIterations)
conformerPropsDict[conformerId] = props
# cluster the conformers
rmsClusters = cluster_conformers(m, clusterMethod, clusterThreshold)
print("Molecule", i, ": generated", len(conformerIds), "conformers and", len(rmsClusters), "clusters")
rmsClustersPerCluster = []
clusterNumber = 0
minEnergy = 9999999999999
for cluster in rmsClusters:
clusterNumber = clusterNumber+1
rmsWithinCluster = align_conformer_clusters(m, cluster)
for conformerId in cluster:
e = props["energy_abs"]
if e < minEnergy:
minEnergy = e
props = conformerPropsDict[conformerId]
props["cluster_no"] = clusterNumber
props["cluster_centroid"] = cluster[0] + 1
idx = cluster.index(conformerId)
if idx > 0:
props["rms_to_centroid"] = rmsWithinCluster[idx-1]
else:
props["rms_to_centroid"] = 0.0
# MLN
# final alignment to input
# -- cannot get it to work --
#for confId in conformerIds:
# rmsd = Chem.rdMolAlign.AlignMol(m, refm, prbCid=confId, refCid=-1, reflect=True)
# print(rmsd)
write_conformers_to_sdf(m, str(i) + ".sdf", rmsClusters, conformerPropsDict, minEnergy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment