Skip to content

Commit

Permalink
added docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
preciserobot committed Jan 11, 2021
1 parent 84daddf commit 224dbec
Showing 1 changed file with 46 additions and 4 deletions.
50 changes: 46 additions & 4 deletions fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,39 @@
import vcf
from collections import defaultdict

#SNPscores for each genotype from Talmud et al. 2013 (DOI:10.1016/S0140-6736(12)62127-8)


'''flatMap'''
'''generic flatMap function'''
flat_map = lambda f, xs: [y for ys in xs for y in f(ys)]


class PRS(object):
#scores and decile ranges from Talmud et al., 2013
"""
Class to easily calculate Polygenic Risk Score for FH.
see Talmund et al 2013
Attributes
----------
SCORES : nested hashtable [location][genotype]
Static risk scores for 12 SNPs from Talmud et al. 2013 (DOI:10.1016/S0140-6736(12)62127-8)
RISKRANGES : tuple of tuples
decile ranges for risk score and the associated risk level (according to Bristol)
vcf_file : str
VCF file path (uncompressed)
locations : [str]
SNP locations as extracted from the SCORES structure
sample_index : int
Index of sample for which score is calculated
Methods
-------
_readGenotypes()
Prints the animals name and what sound it makes
scoreGenotypes()
Calculates PRS and returns min and max value (which are identical if all SNPs have been genotyped)
risk()
Returns decile number for risk score and the associated risk term
"""
SCORES = {
"1:55504650": { #rs2479409 (PCSK9)
"GG":0.104,
Expand Down Expand Up @@ -91,12 +115,23 @@ class PRS(object):
)

def __init__(self, vcf_file, sample_index=0):
"""
Creates class instance by extracting the Genotypes required for risk score calculation
----------
vcf_file : str
VCF file path (uncompressed)
sample_index : str
Index of sample for which score is calculated (defaults to first sample in VCF)
"""
self.vcf_file = vcf_file
self.locations = flat_map(lambda x: x.split(','), self.SCORES.keys())
self.sample_index = sample_index
self._readGenotypes()

def _readGenotypes(self):
"""
Extracts genotypes from VCF at position specified in SCORES hash table
"""
self.genotypes = defaultdict(None)
vcf_reader = vcf.Reader(open(self.vcf_file, 'r'))
for record in vcf_reader:
Expand All @@ -119,6 +154,10 @@ def _readGenotypes(self):
# self.genotypes[location] = gt_bases

def scoreGenotypes(self):
"""
Calculates risk score for extracted genotypes.
Will return a range if not all SNPs were genotyped, or two indentical values otherwise
"""
score_range = [0,0]
for l,s in self.SCORES.items():
locations = l.split(',')
Expand All @@ -135,6 +174,9 @@ def scoreGenotypes(self):
return score_range

def risk(self):
"""
Returns risk decile and associated term
"""
score_range = self.scoreGenotypes()
risk_strings = []
for score in score_range:
Expand Down

0 comments on commit 224dbec

Please sign in to comment.