Merge pull request #13 from Anantha-Rao12/master

Documentation work
igemsoftware2020 · Oct 27, 2020 · 981bb78 · 981bb78
2 parents 60989cd + db2935a
commit 981bb78
Show file tree

Hide file tree

Showing 5 changed files with 589 additions and 9 deletions.
diff --git a/Peptides_against_Malaria/1_Preprocessing_and_scoring/RCSB_static_parser.py b/Peptides_against_Malaria/1_Preprocessing_and_scoring/RCSB_static_parser.py
@@ -15,6 +15,15 @@
 
 def read_pdb_ids_csv(csv_path):
 
+    """ Read a comma separated file that is essentially one row and out put a list 
+
+    Args : 
+        csv_path (str) : The relative/full path to the .csv file
+
+    Returns : 
+        A List of all the values in the .csv file 
+    """
+
 	path = './pdb-ids.csv'   ### Path of csv file containing all pdb-ids downloaded from PDB-advanced-search-options
 	with open(path, 'r') as file:    ### reading the file with a context manager
 	    pdb_id_list = file.read().split(',')     ### create a list containing pdb-ids
@@ -26,6 +35,16 @@ def get_pdb_details(pdb_id):
 
     '''PDB_ID, Desc, Classification, Exp_system, Method, Lit, Pubmed_id, Pubmed_abs, Org1, Mmol, Org2, Mut, Res is the order of items needed'''
 
+    """
+    RCSB Web Parser that extracts the above stated information for a single ODB ID 
+
+    Args : 
+        pdb_id (str) : PDB ID of the molecule obtained from RCSB
+
+    Returns : 
+        A list containing all values scrapped from the Databse
+
+    """
     pdb_details = []
     url = 'https://www.rcsb.org/structure/'+pdb_id
 
@@ -128,12 +147,23 @@ def get_pdb_details(pdb_id):
 
 
 def main(csv_path, col_names):
+
+    """
+    Extract information for all PDB ID 
+
+    Args : 
+        csv_path (str) : Full/relative path to the csv_path containing the PDB IDS
+
+        col_names (list) : Name of the 13 columns that contain information on each aspect of the PDB file 
+
+    Returns : 
+        A Dataframe object from pandas where each row corresponds to a PDB ID and each column corresponds to a particular attribute of that PDB ID 
+    """
+
     pdb_ids_list = read_pdb_ids_csv(csv_path)        
     m = len(pdb_id)   ### length of pdb_id list ie no of pdb_ids
 
-    dataframe = []
-    for i in range(m):
-        dataframe.append(get_pdb_details(pdb_ids_list[i]))
+    dataframe = list(map(get_pdb_details, pdb_ids_list))
 
     dataframe = pd.DataFrame(dataframe, columns=col_names)
 

diff --git a/Peptides_against_Malaria/1_Preprocessing_and_scoring/preprocessing_functions.py b/Peptides_against_Malaria/1_Preprocessing_and_scoring/preprocessing_functions.py
@@ -10,11 +10,42 @@
 import subprocess
 import time
 import pandas as pd
-#from chimera import runCommand as rc  
+#from chimera import runCommand as rc   
 
+# The above line can be uncommented when running sturated_mutagenesis() on UCSF Chimera
 
 def saturated_mutagenesis(model_no,chain_name,start_residue,stop_residue,input_path,file_name,output_path):
 
+    """
+    Perform saturated Mutagenesis (SM) of a given length of peptide submitted in .pdb format and return all mutants each in different files
+
+    Args: 
+        model_no (str) : The model number of the peptide of interest in the .pdb file
+        
+        chain_no (str) : Name of the Chain where SM is to be performed. Ex : 'A' , 'B' or ' ' 
+        
+        start_residue (int) : Residue number on the given chain and model where SM needs to be performed (started) 
+        
+        stop_residue (int) : Residue number on the given chain where the SM needs to be stopped. 
+        
+        input_path (str) : Path to the directory .pdb containing the peptide that needs to undergo SM
+        
+        file_name (str) : Name of the .pdb file submitted 
+        
+        output_path (str) : Name of the output directory where the new models are saved.
+
+    Returns : 
+        This script is to be run in UCSF Chimera and all models/mutants are returned in .pdb format in the output_directory 
+
+
+    Raises : 
+        UCSF Chimera only works with Python 2.x 
+
+    Notes : 
+        Visit Github.com/Anantha-Rao12/Peptides-against-Malaria for more info
+
+    """
+
 	aa_data = 'ala arg asn asp cys glu gln gly his ile leu lys met phe pro ser thr trp tyr val'.split()
 
 	for residue_no in range(start_residue,stop_residue+1):
@@ -28,7 +59,26 @@ def saturated_mutagenesis(model_no,chain_name,start_residue,stop_residue,input_p
 
 
 
-def AnalyseComplex(foldx_path, file_full_path, output_full_path):
+def AnalyseComplex(foldx_path, file_full_path):
+
+    """
+    Use the subprocess module to execute the --analyseComplexChains=A,B command of FoldX and obtain the Interaction Energy between two chains in a .pdb file
+
+    Args:
+        foldx_path (str) : local full/relative path of the FOLDX executable 
+
+        file_path (str) : local full path to the .pdb file that is to be analysed 
+
+
+    Returns : 
+        Prints the time taken to  analyse, process and write the output a single .pdb file
+        Output is the stdout from the terminal 
+
+    Notes : 
+        More information can be found here : foldxsuite.crg.eu/command/AnalyseComplex
+
+
+    """
 
 	data=[]
 	start = time.time()
@@ -43,10 +93,10 @@ def AnalyseComplex(foldx_path, file_full_path, output_full_path):
 
 def make_df_combine(files_path1,files_path2,output_path,csv_file_name):
 
-	''' This function visits files_path1 and files_path2 to collect all foldx Summary.fxout files that was created by AnalyseComplex command.
+	""" Visit files_in path1 and files_in path2 to collect all foldx Summary.fxout files that was created by AnalyseComplex command.
 	With os.listdir each file name is stored in lists called 'foldx_summary_files' via list comprehension. We then open each .fxout Summary file with 
 	a context manager and store the last line of the file which has the required interaction data (tab separated). A list of lists is thus created 
-	and finally grafted into a dataframe with Pandas that is written as a .csv file to the given 'output_path' with 'csv_file_name'. '''
+	and finally grafted into a dataframe with Pandas that is written as a .csv file to the given 'output_path' with 'csv_file_name'."""
 
 	listoflists =[]
 	paths= [files_path1,files_path2]
@@ -55,8 +105,8 @@ def make_df_combine(files_path1,files_path2,output_path,csv_file_name):
 		for file in os.listdir(foldx_summary_files):
 			with open(os.path.join(path,file),'r') as rf:
 				lines = rf.read().splitlines()
-				data = lines[-1].split('\t')
-				header = lines[-2].split('\t')
+				data = lines[-1].split('\t')  #Obtain the last line in the Summary.fxout file
+				header = lines[-2].split('\t') #Obtain the 2nd last line as header in the Summary.fxout file
 				listoflists.append(data)
 	df = pd.DataFrame(listoflists,columns=header)
 	os.chdir(output_path)