From fbace43ed48262c99703d8e5c91c2e4007685bc2 Mon Sep 17 00:00:00 2001 From: Seth Wiggin Date: Tue, 2 Jul 2024 15:51:08 +0100 Subject: [PATCH] add scripts and ReadMe from 2017 mof subset paper NO_JIRA --- scripts/ReadMe.md | 62 +++++---- .../Command_prompt_MOF_solvent_removal.py | 119 ++++++++++++++++++ .../Mercury_MOF_solvent_removal.py | 98 +++++++++++++++ .../ReadMe.md | 56 +++++++++ .../ReadMe.md | 23 ++-- 5 files changed, 324 insertions(+), 34 deletions(-) create mode 100644 scripts/mof_solvent_removal_2017_chem_mater_publication/Command_prompt_MOF_solvent_removal.py create mode 100644 scripts/mof_solvent_removal_2017_chem_mater_publication/Mercury_MOF_solvent_removal.py create mode 100644 scripts/mof_solvent_removal_2017_chem_mater_publication/ReadMe.md diff --git a/scripts/ReadMe.md b/scripts/ReadMe.md index d538c5c..1c257c9 100644 --- a/scripts/ReadMe.md +++ b/scripts/ReadMe.md @@ -1,45 +1,57 @@ -## Contents +# Contents -This folder contains scripts submitted by users or CCDC scientists for anyone to use freely. +## Concat Mol2 -### Hydrogen bond propensity -- Writes a `.docx report` of a hydrogen bond propensity calculation for any given `.mol2`/refcode. +- Concatenates mol2 files present in working directory to a single `.mol2` file. -### Multi-component hydrogen bond propensity -- Performs a multi-component HBP calculation for a given library of co-formers. +## Create CASTEP Input -### Packing similarity dendrogram -- Construct a dendrogram for an input set of structures based on packing-similarity analysis. +- Creates input files (`.cell` and `.param`) files for a given compound through Mercury. -### GOLD-multi -- Use the CSD Docking API and the multiprocessing module to parallelize GOLD docking. +## Create GAUSSIAN Input + +- Create GAUSSIAN input file (`.gjf`) for a given CSD refcode or `.mol2` file. + +## Find Binding Conformation -### Find Binding Conformation - Generates idealized conformers for ligands and evaluates their RMSD to the conformation in the PDB. -### Concat Mol2 -- Concatenates mol2 files present in working directory to a single `.mol2` file. +## GOLD-multi -### Create CASTEP Input -- Creates input files (`.cell` and `.param`) files for a given compound through Mercury. +- Use the CSD Docking API and the multiprocessing module to parallelize GOLD docking. -### Create GAUSSIAN Input -- Create GAUSSIAN input file (`.gjf`) for a given CSD refcode or `.mol2` file. +## Hydrogen bond propensity + +- Writes a `.docx report` of a hydrogen bond propensity calculation for any given `.mol2`/refcode. + +## MOF subset 2017 Chem Mater publication + +- Two scripts that were supplementary information in the publication "Development of a Cambridge Structural Database Subset: + A Collection of Metal–Organic Frameworks for Past, Present, and Future" DOI: + +## Multi-component hydrogen bond propensity + +- Performs a multi-component HBP calculation for a given library of co-formers. + +## Packing similarity dendrogram + +- Construct a dendrogram for an input set of structures based on packing-similarity analysis. + +## Particle Rugosity -### Particle Rugosity - Calculates the simulated BFDH particle rugosity weighted by facet area. -## Tips -A section for top tips in using the repository and GitHub. -### Searching tips: +## Tips + +A section for top tips in using the repository and GitHub. + +### Searching tips The search bar in GitHub allows you to search for keywords mentioned in any file throughout the repository (in the main branch). It is also possible to filter which file type you are interested in. -For example: -"hydrogen bond" +For example: +"hydrogen bond" - - diff --git a/scripts/mof_solvent_removal_2017_chem_mater_publication/Command_prompt_MOF_solvent_removal.py b/scripts/mof_solvent_removal_2017_chem_mater_publication/Command_prompt_MOF_solvent_removal.py new file mode 100644 index 0000000..947a7e8 --- /dev/null +++ b/scripts/mof_solvent_removal_2017_chem_mater_publication/Command_prompt_MOF_solvent_removal.py @@ -0,0 +1,119 @@ +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2016-12-15: created by S. B. Wiggin, the Cambridge Crystallographic Data Centre +# 2024-07-02: minor update to include using ccdc utilities to find the solvent file + +""" +Script to identify and remove bound solvent molecules from a MOF structure. + +Solvents are identified using a defined list. +Output in CIF format includes only framework component with all monodentate solvent removed. +""" +####################################################################### + +import os +import glob +import argparse + +from ccdc import io +from ccdc import utilities + +####################################################################### + +arg_handler = argparse.ArgumentParser(description=__doc__) +arg_handler.add_argument( + 'input_file', + help='CSD .gcd file from which to read MOF structures' +) +arg_handler.add_argument( + '-o', '--output-directory', + help='Directory into which to write stripped structures' +) +arg_handler.add_argument( + '-m', '--monodentate', default=False, action='store_true', + help='Whether or not to strip all unidenate (or monodentate) ligands from the structure' +) +arg_handler.add_argument( + '-s', '--solvent-file', + help='Location of solvent file' +) + +args = arg_handler.parse_args() +if not args.output_directory: + args.output_directory = os.path.dirname(args.input_file) + +# Define the solvent smiles patterns +if not args.solvent_file: + args.solvent_file = utilities.Resources().get_ccdc_solvents_dir() + +if os.path.isdir(args.solvent_file): + solvent_smiles = [ + io.MoleculeReader(f)[0].smiles + for f in glob.glob(os.path.join(args.solvent_file, '*.mol2')) + ] +else: + solvent_smiles = [m.smiles for m in io.MoleculeReader(args.solvent_file)] + + +####################################################################### + + +def is_multidentate(c, mol): + """ + Check for components bonded to metals more than once. + If monodentate is not specified in the arguments, skip this test. + """ + if not args.monodentate: + return True + got_one = False + for a in c.atoms: + orig_a = mol.atom(a.label) + if any(x.is_metal for b in orig_a.bonds for x in b.atoms): + if got_one: + return True + got_one = True + return False + + +def is_solvent(c): + """Check if this component is a solvent.""" + return c.smiles == 'O' or c.smiles in solvent_smiles + + +def has_metal(c): + """Check if this component has any metals.""" + return any(a.is_metal for a in c.atoms) + + +# Iterate over entries +try: + for entry in io.EntryReader(args.input_file): + if entry.has_3d_structure: + # Ensure labels are unique + mol = entry.molecule + mol.normalise_labels() + # Use a copy + clone = mol.copy() + # Remove all bonds containing a metal atom + clone.remove_bonds(b for b in clone.bonds if any(a.is_metal for a in b.atoms)) + # Work out which components to remove + to_remove = [ + c + for c in clone.components + if not has_metal(c) and (not is_multidentate(c, mol) or is_solvent(c)) + ] + # Remove the atoms of selected components + mol.remove_atoms( + mol.atom(a.label) for c in to_remove for a in c.atoms + ) + # Write the CIF + entry.crystal.molecule = mol + with io.CrystalWriter('%s/%s_stripped.cif' % (args.output_directory, entry.identifier)) as writer: + writer.write(entry.crystal) +except RuntimeError: + print('File format not recognised') diff --git a/scripts/mof_solvent_removal_2017_chem_mater_publication/Mercury_MOF_solvent_removal.py b/scripts/mof_solvent_removal_2017_chem_mater_publication/Mercury_MOF_solvent_removal.py new file mode 100644 index 0000000..02b3b7d --- /dev/null +++ b/scripts/mof_solvent_removal_2017_chem_mater_publication/Mercury_MOF_solvent_removal.py @@ -0,0 +1,98 @@ +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2016-12-15: created by S. B. Wiggin, the Cambridge Crystallographic Data Centre +# 2024-07-02: minor update to include using ccdc utilities to find the solvent file + +""" +Script to identify and remove bound solvent molecules from a MOF structure. + +Solvents are identified using a defined list. +Output in CIF format includes only framework component with all monodentate solvent removed. +""" +####################################################################### + +import os +import glob + +from ccdc import io +from ccdc import utilities +from mercury_interface import MercuryInterface + +####################################################################### + +helper = MercuryInterface() +solvent_smiles = [] + +# Define the solvent smiles patterns +solvent_file = utilities.Resources().get_ccdc_solvents_dir() + +if os.path.isdir(solvent_file): + solvent_smiles = [ + io.MoleculeReader(f)[0].smiles + for f in glob.glob(os.path.join(solvent_file, '*.mol2')) + ] + +else: + html_file = helper.output_html_file + f = open(html_file, "w") + f.write('
') + f.write('Sorry, unable to locate solvent files in the CCDC directory') + f.write('
') + f.close() +# a user-defined solvent directory could be added here instead + +####################################################################### + + +def is_solvent(c): + """Check if this component is a solvent.""" + return c.smiles == 'O' or c.smiles in solvent_smiles + + +def has_metal(c): + """Check if this component has any metals.""" + return any(a.is_metal for a in c.atoms) + + +entry = helper.current_entry +if entry.has_3d_structure: + # Ensure labels are unique + mol = entry.molecule + mol.normalise_labels() + # Use a copy + clone = mol.copy() + # Remove all bonds containing a metal atom + clone.remove_bonds(b for b in clone.bonds if any(a.is_metal for a in b.atoms)) + # Work out which components to remove + to_remove = [ + c + for c in clone.components + if not has_metal(c) and is_solvent(c) + ] + # Remove the atoms of selected components + mol.remove_atoms( + mol.atom(a.label) for c in to_remove for a in c.atoms + ) + # Write the CIF + entry.crystal.molecule = mol + with (io.CrystalWriter('%s/%s_stripped.cif' % (helper.options['working_directory_path'], entry.identifier)) as + writer): + writer.write(entry.crystal) + html_file = helper.output_html_file + f = open(html_file, "w") + f.write('
') + f.write('Cif file containing MOF framework without monodentate solvent written to your output directory') + f.write('
') + f.close() +else: + html_file = helper.output_html_file + f = open(html_file, "w") + f.write('
') + f.write('Sorry, this script will only work for CSD entries containing atomic coordinates') + f.write('
') + f.close() diff --git a/scripts/mof_solvent_removal_2017_chem_mater_publication/ReadMe.md b/scripts/mof_solvent_removal_2017_chem_mater_publication/ReadMe.md new file mode 100644 index 0000000..89cdc7a --- /dev/null +++ b/scripts/mof_solvent_removal_2017_chem_mater_publication/ReadMe.md @@ -0,0 +1,56 @@ +# MOF solvent removal + +## Summary + +Scripts included in the supporting information of the article "Development of a Cambridge Structural Database Subset: +A Collection of Metal–Organic Frameworks for Past, Present, and Future", Peyman Z. Moghadam, Aurelia Li, +Seth B. Wiggin, Andi Tao, Andrew G. P. Maloney, Peter A. Wood, Suzanna C. Ward, and David Fairen-Jimenez +*Chem. Mater.* **2017**, 29, 7, 2618–2625, DOI: + +Scripts are essentially equivalent: one is designed to be run through the Mercury CSD Python API menu to +remove solvent from a single structure present in the visualiser, the second runs from the command line +and takes a list of CSD entries (a .gcd file) to run through the solvent removal process in bulk. + +## Requirements + +Tested with CSD Python API 3.9.18 + +## Licensing Requirements + +CSD-Core + +## Instructions on running + +For the script Mercury_MOF_solvent_removal.py: + +- In Mercury, pick **CSD Python API** in the top-level menu, then **Options…** in the resulting pull-down menu. +- The Mercury Scripting Configuration control window will be displayed; from the *Additional Mercury Script Locations* +section, use the **Add Location** button to navigate to a folder location containing the script +- It will then be possible to run the script directly from the CSD Python API menu, with the script running on the structure +shown in the visualiser + +For the script Command_prompt_MOF_solvent_removal.py + +```cmd +python Command_prompt_MOF_solvent_removal.py .gcd +``` + +```cmd +positional arguments: + input_file CSD .gcd file from which to read MOF structures + +optional arguments: + -h, --help show this help message and exit + -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY + Directory into which to write stripped structures + -m, --monodentate + Whether or not to strip all unidenate (or monodentate) ligands from the structure + -s SOLVENT_FILE, --solvent-file SOLVENT_FILE + The location of a solvent file +``` + +## Author + +*S.B.Wiggin* (2016) + +> For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk) diff --git a/scripts/multi_component_hydrogen_bond_propensity/ReadMe.md b/scripts/multi_component_hydrogen_bond_propensity/ReadMe.md index b7d6092..bb46a3d 100644 --- a/scripts/multi_component_hydrogen_bond_propensity/ReadMe.md +++ b/scripts/multi_component_hydrogen_bond_propensity/ReadMe.md @@ -1,28 +1,31 @@ -# Multi-Component Hydrogen Bond Propensity +# Multi-Component Hydrogen Bond Propensity ## Summary Performs a multi-component Hydrogen bond propensity calculation for a given library of co-formers -## Example -Individual reports are generated for each coformer stored in indvidual folders. A summary multicomponent report with rankings is also generated. +## Example -Individual reports include: +Individual reports are generated for each coformer stored in indvidual folders. A summary multicomponent report with rankings is also generated. -- Predicted intermolecular hydrogen bond propensities +Individual reports include: + +- Predicted intermolecular hydrogen bond propensities - Hydrogen bond coordination likelihood -Summary report includes: +Summary report includes: + - Chemical Diagram -- Table of ranked components +- Table of ranked components - Multi-component hydrogen-bond propensity chart -## Requirements +## Requirements + - docxtpl - multi_component_hydrogen_bond_propensity_report.docx - multi_component_pair_hbp_report.docx -## Licensing Requirements +## Licensing Requirements - CSD-Materials @@ -47,6 +50,7 @@ optional arguments: ``` The default coformer library is the one supplied with your Mercury install + - for 2023.1 or later, in ```\ccdc-software\mercury\molecular_libraries\ccdc_coformers``` - for 2022.3 or earlier, in ```\Mercury\molecular_libraries\ccdc_coformers``` @@ -55,6 +59,7 @@ Ensure the input structure and coformers have the correct bond typing and any ch If the calculation fails for any API-coformer combinations, they will be recorded with N/A in the summary report table ## Author + _Andrew Maloney_ (CCDC) 2017 > For feedback or to report any issues please contact [support@ccdc.cam.ac.uk](mailto:support@ccdc.cam.ac.uk)