-
-
Save mlnance/30e7f8e06efb175e847b07153ee5351f to your computer and use it in GitHub Desktop.
Conformer generation using RDKit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| __author__ = "tdudgeon" # externally developed code | |
| """ | |
| Source: https://gist.github.com/tdudgeon/b061dc67f9d879905b50118408c30aac | |
| Notes: Requires `conda activate rdkit` on ranger | |
| Edits: mlnance | |
| Date: 1 August 2022 | |
| """ | |
| ################################################################################ | |
| ########### | |
| # IMPORTS # | |
| ########### | |
| import sys | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem, TorsionFingerprints | |
| from rdkit.ML.Cluster import Butina | |
| ################################################################################ | |
| ########### | |
| # METHODS # | |
| ########### | |
| def gen_conformers(mol, | |
| numConfs=100, | |
| maxAttempts=1000, | |
| pruneRmsThresh=0.1, | |
| useExpTorsionAnglePrefs=True, | |
| useBasicKnowledge=True, | |
| useMacrocycleTorsions=True, # MLN | |
| enforceChirality=True, | |
| numThreads=4): | |
| ids = AllChem.EmbedMultipleConfs(mol, | |
| numConfs=numConfs, | |
| maxAttempts=maxAttempts, | |
| pruneRmsThresh=pruneRmsThresh, | |
| useExpTorsionAnglePrefs=useExpTorsionAnglePrefs, | |
| useBasicKnowledge=useBasicKnowledge, | |
| enforceChirality=enforceChirality, | |
| numThreads=numThreads) | |
| return list(ids) | |
| def write_conformers_to_sdf(mol, | |
| filename, | |
| rmsClusters, | |
| conformerPropsDict, | |
| minEnergy): | |
| w = Chem.SDWriter(filename) | |
| for cluster in rmsClusters: | |
| for confId in cluster: | |
| for name in mol.GetPropNames(): | |
| mol.ClearProp(name) | |
| conformerProps = conformerPropsDict[confId] | |
| mol.SetIntProp("conformer_id", confId + 1) | |
| for key in conformerProps.keys(): | |
| mol.SetProp(key, str(conformerProps[key])) | |
| e = conformerProps["energy_abs"] | |
| if e: | |
| mol.SetDoubleProp("energy_delta", e - minEnergy) | |
| w.write(mol, confId=confId) | |
| w.flush() | |
| w.close() | |
| def calc_energy(mol, | |
| conformerId, | |
| mmffVariant, | |
| minimizeIts): | |
| results = {} | |
| # single-threaded minimization approach | |
| ff = AllChem.MMFFGetMoleculeForceField(mol, | |
| AllChem.MMFFGetMoleculeProperties(mol, | |
| mmffVariant=mmffVariant), | |
| confId=conformerId) | |
| ff.Initialize() | |
| ff.CalcEnergy() | |
| if minimizeIts > 0: | |
| results["converged"] = ff.Minimize(maxIts=minimizeIts) | |
| results["energy_abs"] = ff.CalcEnergy() | |
| return results | |
| def cluster_conformers(mol, | |
| mode="RMSD", | |
| threshold=2.0): | |
| if mode == "TFD": | |
| dmat = TorsionFingerprints.GetTFDMatrix(mol) | |
| else: | |
| dmat = AllChem.GetConformerRMSMatrix(mol, | |
| prealigned=False) | |
| rms_clusters = Butina.ClusterData(dmat, | |
| mol.GetNumConformers(), | |
| threshold, | |
| isDistData=True, | |
| reordering=True) | |
| return rms_clusters | |
| def align_conformer_clusters(mol, | |
| clust_ids): | |
| rmslist = [] | |
| AllChem.AlignMolConformers(mol, | |
| confIds=clust_ids, | |
| RMSlist=rmslist) | |
| return rmslist | |
| ################################################################################ | |
| ######## | |
| # MAIN # | |
| ######## | |
| if len(sys.argv) < 4: | |
| print("Usage: conf_gen.py <sdf input> <num conformers> <max attempts> <prune threshold> <cluster method: (RMSD|TFD) = RMSD> <cluster threshold = 0.2> <minimize iterations: = 0> <MMFF variant: = (MMFF94|MMFF94s) = MMFF94") | |
| sys.exit() | |
| input_file = sys.argv[1] | |
| numConfs = int(sys.argv[2]) | |
| maxAttempts = int(sys.argv[3]) | |
| pruneRmsThresh = float(sys.argv[4]) | |
| if len(sys.argv) > 5: clusterMethod = sys.argv[5] | |
| else: clusterMethod = "RMSD" | |
| if len(sys.argv) > 6: clusterThreshold = float(sys.argv[6]) | |
| else: clusterThreshold = 2.0 | |
| if len(sys.argv) > 7: minimizeIterations = int(sys.argv[7]) | |
| else: minimizeIterations = 0 | |
| if len(sys.argv) > 8: mmffVariant = sys.argv[8] | |
| else: mmffVariant = "MMFF94" | |
| suppl = Chem.ForwardSDMolSupplier(input_file) | |
| i=0 | |
| for mol in suppl: | |
| i = i+1 | |
| if mol is None: continue | |
| m = Chem.AddHs(mol) | |
| Chem.rdmolops.AssignStereochemistryFrom3D(mol) | |
| Chem.rdmolops.AssignAtomChiralTagsFromStructure(mol) | |
| refm = Chem.Mol(m) | |
| # generate the confomers | |
| print("Generating conformers for molecule ", i) | |
| conformerIds = gen_conformers(m, numConfs, maxAttempts, pruneRmsThresh, True, True, True) | |
| conformerPropsDict = {} | |
| if minimizeIterations: | |
| print("Minimizing and calculating energies of conformers of molecule ", i) | |
| else: | |
| print("Calculating energies of conformers of molecule ", i) | |
| for conformerId in conformerIds: | |
| # energy minimise (optional) and energy calculation | |
| props = calc_energy(m, conformerId, mmffVariant, minimizeIterations) | |
| conformerPropsDict[conformerId] = props | |
| # cluster the conformers | |
| rmsClusters = cluster_conformers(m, clusterMethod, clusterThreshold) | |
| print("Molecule", i, ": generated", len(conformerIds), "conformers and", len(rmsClusters), "clusters") | |
| rmsClustersPerCluster = [] | |
| clusterNumber = 0 | |
| minEnergy = 9999999999999 | |
| for cluster in rmsClusters: | |
| clusterNumber = clusterNumber+1 | |
| rmsWithinCluster = align_conformer_clusters(m, cluster) | |
| for conformerId in cluster: | |
| e = props["energy_abs"] | |
| if e < minEnergy: | |
| minEnergy = e | |
| props = conformerPropsDict[conformerId] | |
| props["cluster_no"] = clusterNumber | |
| props["cluster_centroid"] = cluster[0] + 1 | |
| idx = cluster.index(conformerId) | |
| if idx > 0: | |
| props["rms_to_centroid"] = rmsWithinCluster[idx-1] | |
| else: | |
| props["rms_to_centroid"] = 0.0 | |
| # MLN | |
| # final alignment to input | |
| # -- cannot get it to work -- | |
| #for confId in conformerIds: | |
| # rmsd = Chem.rdMolAlign.AlignMol(m, refm, prbCid=confId, refCid=-1, reflect=True) | |
| # print(rmsd) | |
| write_conformers_to_sdf(m, str(i) + ".sdf", rmsClusters, conformerPropsDict, minEnergy) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment