Source code for limmbo.core.gwas

import sys

import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt

import copy

import scipy as sp
import pandas as pd
import numpy as np

import limix as limi
import limix.qtl as qtl
import limix.plot as plot
import limix.mtset

from limmbo.utils.utils import verboseprint
from limmbo.utils.utils import effectiveTests

[docs]class GWAS(object):
    """
    """
    def __init__(self, datainput, seed=10, verbose=True,
            searchDelta=False):
        '''
        nothing to initialize
        '''
        self.verbose = verbose
        self.seed = seed
        self.searchDelta = searchDelta
        self.genotypes = datainput.genotypes
        self.genotypes_info = datainput.genotypes_info
        self.phenotypes = datainput.phenotypes
        self.phenotype_ID = datainput.phenotype_ID
        self.covariates = datainput.covariates
        self.relatedness = datainput.relatedness
        self.pcs = datainput.pcs
        self.Cg = datainput.Cg
        self.Cn = datainput.Cn
        self.test = "lrt"

        self.pvalues = None
        self.pvalues_adjust = None
        self.pvalues_empirical_raw = None
        self.stats = None
        self.z = None
        model = None
        self.pvalues_empirical = None
        self.adjustBy = None
        self.estimate_vd = None
        self.fdr_empirical = None

        try:
            self.genotypes = np.array(self.genotypes)
        except:
            raise IOError("datainput.genotypes cannot be coverted to np.array")

        try:
            self.phenotypes = np.array(self.phenotypes)
        except:
            raise IOError("datainput.phenotypes cannot be coverted to np.array")

        if self.covariates is not None:
            try:
                self.covariates = np.array(self.covariates)
            except:
                raise IOError("datainput.covariates cannot be coverted to np.array")
        
        if self.relatedness is not None:
            try:
                self.relatedness = np.array(self.relatedness)
            except:
                raise IOError("datainput.relatedness cannot be coverted to np.array")

[docs]    def runAssociationAnalysis(self, mode, setup="lmm", adjustSingleTrait=None):
        r"""
        Analysing the association between phenotypes, genotypes, optional
        covariates and random genetic effects.

        Arguments:
            mode (string):
                pecifies the type of linear model: either 'multitrait' for
                multivariate analysis or 'singletrait' for univariate analysis.
            setup (string):
                specifies the linear model setup: either 'lmm' for linear mixed
                model or 'lm' for a simple linear model.
            adjustSingleTrait (string):
                Method to adjust single-trait association p-values for testing
                multiple traits; If None (default) no adjusting. Options are
                'bonferroni' (for bonferroni correction') or 'effective' (for
                correcting for the effective number of tests as described in 
                `(Galwey,2009)
                <http://onlinelibrary.wiley.com/doi/10.1002/gepi.20408/abstract>`_.
                

        Returns:
            (dictionary):
                dictionary containing:

                - **lm** (:class:`limix.qtl.LMM`):
                  LIMIX LMM object
                - **pvalues** (numpy array):
                  [`NrSNP` x `P`] (when mode is singletrait) or [1 x`NrSNP`]
                  array of p-values.
                - **betas** (numpy array):
                  [`NrSNP` x `P`] array of effect size estimates per SNP across
                  all traits.
                - **pvalues_adjust** (numpy array):
                  only returned if mode is 'singletrait' and 'adjustSingleTrait'
                  is not None; contains single-trait p-values adjusted for the
                  number of single-trait analyses conducted.

        Examples:

            .. doctest::

                >>> from pkg_resources import resource_filename
                >>> from limmbo.io.reader import ReadData
                >>> from limmbo.io.input import InputData
                >>> from limmbo.core.gwas import GWAS
                >>> data = ReadData(verbose=False)
                >>> file_pheno = resource_filename('limmbo',
                ...                                'io/test/data/pheno.csv')
                >>> file_geno = resource_filename('limmbo',
                ...                                'io/test/data/genotypes.csv')
                >>> file_relatedness = resource_filename('limmbo',
                ...                     'io/test/data/relatedness.csv')
                >>> file_covs = resource_filename('limmbo',
                ...                               'io/test/data/covs.csv')
                >>> file_Cg = resource_filename('limmbo',
                ...                     'io/test/data/Cg.csv')
                >>> file_Cn = resource_filename('limmbo',
                ...                     'io/test/data/Cn.csv')
                >>> data.getPhenotypes(file_pheno=file_pheno)
                >>> data.getCovariates(file_covariates=file_covs)
                >>> data.getRelatedness(file_relatedness=file_relatedness)
                >>> data.getGenotypes(file_genotypes=file_geno)
                >>> data.getVarianceComponents(file_Cg=file_Cg,
                ...                            file_Cn=file_Cn)
                >>> indata = InputData(verbose=False)
                >>> indata.addPhenotypes(phenotypes = data.phenotypes)
                >>> indata.addRelatedness(relatedness = data.relatedness)
                >>> indata.addCovariates(covariates = data.covariates)
                >>> indata.addGenotypes(genotypes=data.genotypes,
                ...                     genotypes_info=data.genotypes_info)
                >>> indata.addVarianceComponents(Cg = data.Cg, Cn=data.Cn)
                >>> indata.commonSamples()
                >>> indata.regress()
                >>> indata.transform(transform="scale")
                >>> gwas = GWAS(datainput=indata, seed=10, verbose=False)
                >>>
                >>>
                >>> # Example of multi-trait single-variant association testing
                >>> # using a linear mixed model.
                >>> resultsAssociation = gwas.runAssociationAnalysis(
                ...     setup="lmm", mode="multitrait")
                >>> resultsAssociation.keys()
                ['lm', 'betas', 'pvalues']
                >>> resultsAssociation['pvalues'].shape
                (1, 20)
                >>> '{:0.3e}'.format(resultsAssociation['pvalues'].min())
                '4.584e-09'
                >>> resultsAssociation['betas'].shape
                (10, 20)
                >>>
                >>>
                >>> # Example of single-trait single-variant association testing
                >>> # using a linear mixed model.
                >>> resultsAssociation = gwas.runAssociationAnalysis(
                ...     setup="lmm", mode="singletrait",
                ...     adjustSingleTrait = "effective")
                >>> resultsAssociation.keys()
                ['pvalues_adjust', 'lm', 'betas', 'pvalues']
                >>> resultsAssociation['pvalues'].shape
                (10, 20)
                >>> resultsAssociation['betas'].shape
                (10, 20)
                >>> '{:0.3e}'.format(resultsAssociation['pvalues_adjust'].min())
                '2.262e-03'
        """


        # set parameters for the analysis
        self.N, self.P = self.phenotypes.shape
        self.S    = self.genotypes.shape[1]
        verboseprint("Loaded {} samples, {} phenotypes, {} snps".format(
            self.N, self.P, self.S), verbose = self.verbose)
            
        self.setup = setup
        self.mode = mode

        if mode == "multitrait":
            associationResults = self.__multiTraitAssociation_anyeffect(
                genotypes = self.genotypes)

        if mode == "singletrait":
            associationResults = self.__singleTraitAssociation(
                genotypes = self.genotypes,
                adjustSingleTrait = adjustSingleTrait)

        return associationResults

    def __multiTraitAssociation_anyeffect(self, genotypes, empiricalP=False,
            computeFDR=False):
        r"""
        Muti-trait association test, testing for an effect of the genotype on
        `any` phenotype (`any` effect test). Wraps around
        `qtl.qtl_test_lmm_kronecker`.

        Arguments:
            genotypes (array-like):
                [`N` x `NrSNP`] genotype array of [`N`] samples and [`NrSNP`]
                genotypes
            empiricalP (bool):
                set to True if association test is part of estimating empirical
                pvalues
            computeFDR (bool):
                set to True if association test is part of estimating empirical
                FDR

        Returns:
            (dictionary):
                dictionary containing:

                - **lm**(:class:`limix.qtl.LMM`):
                  LIMIX LMM object
                - **pvalues** (numpy array):
                  [1 x`NrSNP`] array of
                  p-values
                - **betas** (numpy array):
                  [`NrSNP` x `P`] array of effect size estimates per SNP across
                  all traits

        """
        if self.covariates is None:
            Acovs = None
        else:
            Acovs = sp.eye(self.P)
        if self.setup is "lmm":
            if self.Cg is None:
                if self.P > 30:
                    print("Warning: For large trait sizes, computation times "
                  "for pure REML variance decomposition are long, "
                   "consider bootstrapping trait-trait covariance"
                    " components")
                verboseprint("Estimate Variance components",
                            verbose = self.verbose)
                self.__varianceDecomposition()
            K1c = self.Cg
            K2c = self.Cn
            K1r = self.relatedness
            self.model="lmm_mt"
        else:
            K1c = 1e-9*sp.eye(self.P)
            K2c = sp.cov(self.phenotypes.T)
            K1r = sp.eye(self.N)

            if self.pcs is not None:
                self.model="lm_mt_pcs"
            else:
                self.model="lm_mt"
        Asnps = sp.eye(self.P)
        if not empiricalP and not computeFDR:
            verboseprint("Computing multi-trait (any effect) model: {}".format(
                self.model), verbose = self.verbose)

        lm, pvalues = qtl.qtl_test_lmm_kronecker(snps=genotypes,
                phenos=self.phenotypes, Asnps=Asnps, Acovs=Acovs,
                covs=self.covariates, K1r=K1r, K1c=K1c, K2c=K2c,
                searchDelta=self.searchDelta)

        if not empiricalP and not computeFDR:
            betas = lm.getBetaSNP()
        else:
            betas = None
            lm = None
        
        return {"pvalues": pvalues, "betas": betas, "lm": lm}


    def __singleTraitAssociation(self, genotypes, adjustSingleTrait=None,
            empiricalP=False, computeFDR=False):
        r"""
        Single-trait association test. Wraps around `qtl.qtl_test_lmm`.

        Arguments:
            genotypes (array-like):
                [`N` x `NrSNP`] genotype array of [`N`] samples and [`NrSNP`]
                genotypes
            empiricalP (bool):
                set to True if association test is part of estimating empirical
                pvalues
            computeFDR (bool):
                set to True if association test is part of estimating empirical
                FDR
            adjustSingleTrait (string):
                Method to adjust single-trait association p-values for testing
                multiple traits; If None (default) no adjusting. Options are
                'bonferroni' (for bonferroni correction') or 'effective' (for
                correcting for the effective number of tests as described in 
                `(Galwey,2009) <http://onlinelibrary.wiley.com/doi/10.1002/gepi.20408/abstract>`_.

        Returns:
            (dictionary):
                dictionary containing:

                    - **lm**(:class:`limix.qtl.LMM`):
                      LIMIX LMM object
                    - **pvalues** (numpy array):
                      [`P` x `NrSNP`] array of p-values
                    - **betas** (numpy array):
                      [`P` x `NrSNP`] array of effect size estimates per SNP 
                      across all traits
                    - **pvalues_adjust** (numpy array):
                      only returned if 'adjustSingleTrait' is not None; contains
                      single-trait p-values adjusted for the number of 
                      single-trait analyses conducted

        """

        if self.setup is "lmm":
            self.model="lmm_st"
            K = self.relatedness
        else:
            if self.pcs is not None:
                self.model="lm_st_pcs"
            else:
                self.model="lm_st"
                K = None

        if not empiricalP and not computeFDR:
            verboseprint("Computing single-trait association ({})".format(
                self.model), verbose = self.verbose)
            lm = qtl.qtl_test_lmm(snps=genotypes, pheno=self.phenotypes,
                K=K, covs=self.covariates, test=self.test)
            pvalues = lm.getPv()

        if not empiricalP and not computeFDR:
            betas = lm.getBetaSNP()
        else:
            betas = None
            lm = None

        if adjustSingleTrait is not None:
            if self.adjustBy is None:
                if adjustSingleTrait is "bonferroni":
                    self.adjustBy = self.P
                elif adjustSingleTrait is "effective":
                    self.adjustBy = effectiveTests(self.phenotypes)
                else:
                    raise (("{} is not a provided method to adjust single-trait"
                    "pvalues for multiple hypothesis testing").format(
                        adjustSingleTrait))
            pvalues_adjust = np.array([self.__adjust(p) for p in pvalues])
        else:
            pvalues_adjust = None

        self.adjustSingleTrait = adjustSingleTrait

        return {"lm":lm, "pvalues": pvalues, "pvalues_adjust": pvalues_adjust,
                "betas": betas}


[docs]    def saveAssociationResults(self, results, outdir, name="",
            pvalues_empirical=None):
        
        r"""
        Saves results of association analyses.

        Arguments:
            results (dictionary):
                dictionary generated via runAssociation analysis, containing:
		
                - **lm** (:class:`limix.qtl.LMM`):
		  LIMIX LMM object
		- **pvalues** (numpy array):
		  [`P` x `NrSNP`] array of p-values
		- **betas** (numpy array):
		  [`P` x `NrSNP`] array of effect size estimates per SNP
		  across all traits
		- **pvalues_adjust** (numpy array):
		  only returned mode == singletrait and if 'adjustSingleTrait'
                  is not None; contains single-trait p-values adjusted for the 
                  number of single-trait analyses conducted
                
            outdir (string):
                '/path/to/output/directory'; needs user writing permission
            name (string):
                input name specific name, such as chromosome used in analysis
            pvalues_empirical (pandas dataframe, optional):
                empirical pvalues via adjustSingleTrait 

        Returns:
            (None)
        """
        if name is not "":
            self.name = name
            outstring = (outdir, self.model, "_" + name)
        else:
            self.name = ""
            outstring = (outdir, self.model, "")
        
        self.genotypes_info['SNP'] = self.genotypes_info.index

        beta_df = pd.DataFrame(results['betas'].T,
            index=self.genotypes_info.index, columns = self.phenotype_ID)
        beta_df = pd.concat([self.genotypes_info, beta_df], axis=1)
        
        if self.mode is 'singletrait':
            pvalues_df = pd.DataFrame(results['pvalues'].T,  
                index=self.genotypes_info.index, columns = self.phenotype_ID)
            pvalues_df = pd.concat([self.genotypes_info, pvalues_df], axis=1)

            if results['pvalues_adjust'] is not None:
                pvalues_adjust_df = pd.DataFrame(results['pvalues_adjust'].T,  
                    index=self.genotypes_info.index, 
                    columns = self.phenotype_ID)
                pvalues_adjust_df = pd.concat([self.genotypes_info,
                    pvalues_adjust_df], axis=1)
                
                pvalues_adjust_df.to_csv("%s/%s_padjust%s.csv" % outstring,
                    index=False)
            
            if pvalues_empirical is not None:
                
                pempirical_df = pd.DataFrame(pvalues_empirical.T,
                    index=self.genotypes_info.index, 
                    columns = self.phenotype_ID)
                    #index=SNP, columns=['Pempirical'])
                pempirical_df = pd.concat([self.genotypes_info, 
                    pempirical_df], axis=1)
                
                pempirical_raw_df = pd.DataFrame(self.pvalues_empirical_raw.T,
                    index=self.genotypes_info.index, 
                    columns = self.phenotype_ID)
                pempirical_raw_df = pd.concat([self.genotypes_info, 
                    pempirical_raw_df], axis=1)
                
                pempirical_raw_df.to_csv("%s/%s_pempirical_raw%s%s.csv" %
                    (outstring + (self.fdr,)), index=False)
            
        if self.mode is 'multitrait':
            pvalues_df = pd.DataFrame(results['pvalues'].T,
                index=self.genotypes_info.index, columns = ["P"])
            pvalues_df = pd.concat([self.genotypes_info, pvalues_df], axis=1)
            
            if pvalues_empirical is not None:
                pvalues_empirical_df = pd.DataFrame(pvalues_empirical.T,
                    index=self.genotypes_info.index, columns = ["P"])
                pvalues_empirical_df = pd.concat([self.genotypes_info, 
                    pvalues_empirical-df], axis=1)

            
        pvalues_df.to_csv("%s/%s_pvalue%s.csv" % outstring, index=False)
        beta_df.to_csv("%s/%s_betavalue%s.csv" % outstring, index=False)

        if pvalues_empirical is not None:
            pempirical_df.to_csv("%s/%s_pempirical%s%s.csv" %
                (outstring + (self.fdr,)), index=False)

        if self.estimate_vd:
            if self.timeVD is not None:
                pd.DataFrame(self.timeVD).to_csv(
                    "%s/timeVarianceDecomposition_REML.csv" % outdir,
                    index=False, header=False)

            pd.DataFrame(self.Cg).to_csv("%s/Cg_REML.csv" % (outdir),
                index=False, header=False)
            pd.DataFrame(self.Cn).to_csv("%s/Cn_REML.csv" % (outdir),
                index=False, header=False)

            if self.fdr is not None:
                pd.DataFrame(self.ppermute).to_csv("%s/%s_ppermute%s.csv" %
                    outstring, index=False)
                pd.DataFrame(['FDR', str(self.FDR)]).T.to_csv(
                    "%s/%s_empiricalFDR_%s.csv" % outstring, header=False, 
                    index=False)

[docs]    def computeEmpiricalP(self, pvalues, nrpermutations = 1000):
        r"""
        Compute empirical p-values: permute the genotypes, do the 
        association test, record if permuted p-value of SNP is smaller than
        original p-value. Sum these occurrences and divide by total number of
        permutation.

        Arguments:
            pvalues (array-like):
                [`P` x `NrSNP`] (single-trait) or [1 x `NrSNP`] (multi-trait) 
                array of p-values
            nrpermutations (int):
                number of permutations; 1/nrpermutations is the maximum level
                of significance (alpha)to test for, e.g. 
                nrpermuations=100 -> alpha=0.01 

        Returns:
            (numpy array): 
                [`P` x `NrSNP`] (single-trait) or [1 x `NrSNP`] (multi-trait) 
                array of empirical p-values
        """

        verboseprint("Computing empirical p-values", verbose=self.verbose)
       
        np.random.seed(seed=self.seed)

        self.nrpermutations = nrpermutations
        pvalues = np.array(pvalues)

        if self.mode is "multitrait":
            count = np.zeros((nrpermutations, self.S))
        else:
            count = np.zeros((nrpermutations, self.P, self.S))

        for ps in range (nrpermutations):
            verboseprint("Permutation %s" % ps)
            genotypes_permute = self.genotypes[np.random.choice(self.N, self.N, 
                replace=False), :]
            if self.mode is "multitrait":
                resultsPermute = self.__multiTraitAssociation_anyeffect( 
                    genotypes = genotypes_permute, empiricalP=True)
                count[ps, :] = resultsPermute['pvalues'] <= pvalues
            if self.mode is "singletrait":
                resultsPermute = self.__singletraitAssociation(
                    genotypes = genotypes_permute, empiricalP=True)
                count[ps,:, :] = resultsPermute['pvalues'] <= pvalues


        pvalues_empirical = np.sum(count, axis=0)/nrpermutations
        
        return pvalues_empirical

[docs]    def computeFDR(self, fdr):
        r"""
        Create an empirical p-values distribution by permuting the genotypes
        and running the association test on these (10 random permuations). 
        Recored the observed pvalues, order by rank and find the rank of the 
        desired FDR to determine the empirical FDR.

        Arguments:
            fdr (float):
                desired fdr threshold

        Returns:
            (tuple):
                tuple containing:

                    - **empirical fdr** (float):
                    - **empirical_pvalue_distribution** (numpy array):
                      array of empirical p-values for 10 permutations tests
        """

        verboseprint("Computing empirical FDR", verbose = self.verbose)
        
        np.random.seed(seed=self.seed)
        #tests = int(1/self.options.fdr)
        tests = 10

        self.ppermute = np.zeros((tests, self.S))

        for ps in range (tests):
            genotypes_permute = self.genotypes[np.random.choice(self.N, self.N,
                replace=False), :]
            if self.mode == "multitrait":
                resultsFDR = self.__multiTraitAssociation_anyeffect(
                        genotypes = genotypes_permute, computeFDR=True)
                ppermute[ps, :] = resultsFDR['pvalues_permute']
            if self.mode == "singletrait":
                resultsFDR = self.__singleTraitAssociation(
                    genotypes = genotypes_permute, computeFDR=True,
                    adjustSingleTrait=adjustSingleTrait)
                pvalues_adjust_min = resultsFDR['pvalues_permute_adjust'].min(
                    axis=0)
                ppermute[ps, :] = pvalues_adjust_min

        SNPsPassingFDR = int(fdr* self.S* tests)
        self.allppermute = ppermute.flatten()
        self.allppermute.sort()
        self.fdr_empirical = self.allppermute[SNPsPassingFDR]

        return self.fdr_empirical, self.allppermute

    def __adjust(self, pv):
	r"""
	Adjust pvalues for multiple hypothesis testing by multiplying with 
        a constant (if adjustBy=P, equivalent to Bonferroni adjustment).

	Arguments:
            pv (array like):
                p-values

        Returns:
            pv (numpy array):
                adjusted p-values
	"""
        pvadjust = np.array([min(pveff, 1) for pveff in (pv * self.adjustBy)])
        return pvadjust

    def __varianceDecomposition(self, cache=True):
        vd = limix.mtset.MTSet(Y=self.phenotypes, R=self.relatedness)
        vd_null_info = vd.fitNull(n_times=1000, rewrite=True)
        if vd_null_info['conv']:
            verboseprint("Variance decomposition converged")
            self.Cg = vd_null_info['Cg'] + 1e-4*sp.eye(self.P)
            self.Cn = vd_null_info['Cn'] + 1e-4*sp.eye(self.P)
            self.time = vd_null_info['time']
        else:
            sys.exit("Variance decomposition did not converge")

[docs]    def manhattanQQ(self, results, colourS='DarkBLue', colourNS='Orange',
            alphaS=0.5, alphaNS=0.1, thr_plotting=None, saveTo=None):
        r"""
	Plot manhattan and quantile-quantile plot of association results.

        Arguments:
            results (dictionary):
                dictionary generated via runAssociation analysis
            colourS (string, optional):
                colour of significant points
            colourNS (string, optional):
                colour of non-significant points
            alphaS (float, optional):
                plotting transparency of significant points
            alphaNS (float, optional):
                plotting transparency of non-significant points
            thr_plotting (float, optional):
                y-intercept for horizontal line as a marker for significance
            saveTo (string, optional):
		/path/to/output/directory to automatically save plot as pdf;
                needs user writing permission.

        Returns:
            (None)
        """
        if self.mode is 'singletrait':
            fig = plt.figure(figsize=[12,4])
            pv_min = np.array(results['pvalues']).min(axis=0)
            data = pd.DataFrame({'pv': pv_min, 
                'chrom': np.array(self.genotypes_info['chrom']).astype('int'), 
                'pos': np.array(self.genotypes_info['pos']).astype('int')})
            ax1 = fig.add_subplot(2,1,1)
            plot.plot_manhattan(df=data, 
                null_style={'alpha':alphaS, 'color':colourS}, 
                alt_style = {'alpha':alphaNS, 'color':colourNS},
                alpha=thr_plotting)
            ax1.set_title('%s' % self.name)
            ax2 = fig.add_subplot(2,1,2)
            plot.qqplot(results['pvalues'].ravel(), alphaLevel=None)
            fig.tight_layout()      
            
            if results['pvalues_adjust'] is not None:
            	fig = plt.figure(figsize=[12,8])
            	pv_adjust_min = np.array(results['pvalues_adjust']).min(
                    axis=0).ravel()
                data_adjust = pd.DataFrame({'pv': pv_min, 
                    'chrom': np.array(self.genotypes_info['chrom']).astype('int'), 
                    'pos': np.array(self.genotypes_info['pos']).astype('int')})
		ax1 = fig.add_subplot(4,1,1)
                plot.plot_manhattan(df=data, 
                    null_style={'alpha':alphaS, 'color':colourS}, 
                    alt_style = {'alpha':alphaNS, 'color':colourNS},
                    alpha=thr_plotting)
		ax1.set_title('%s (p-values)' % self.name)
		ax2 = fig.add_subplot(4,1,2)
		plot.qqplot(results['pvalues'].ravel())
		ax1 = fig.add_subplot(4,1,3)
                plot.plot_manhattan(df=data_adjust, 
                    null_style={'alpha':alphaS, 'color':colourS}, 
                    alt_style = {'alpha':alphaNS, 'color':colourNS},
                    alpha=thr_plotting)
		ax1.set_title('%s (p-values adjust)' % self.name)
		ax2 = fig.add_subplot(4,1,4)
		plot.qqplot(results['pvalues_adjust'].ravel())
		fig.tight_layout()      
  
        if self.mode is 'multitrait':
            pv = np.array(results['pvalues']).ravel() 
            data = pd.DataFrame({'pv': pv, 
                'chrom': np.array(self.genotypes_info['chrom']).astype('int'), 
                'pos': np.array(self.genotypes_info['pos']).astype('int')})
            fig = plt.figure(figsize=[12,4])
            ax1 = fig.add_subplot(2,1,1)
            plot.plot_manhattan(df=data, 
                null_style={'alpha':alphaS, 'color':colourS}, 
                alt_style = {'alpha':alphaNS, 'color':colourNS},
                alpha=thr_plotting)
            ax1.set_title('%s' % self.name)
            ax2 = fig.add_subplot(2,1,2)
            plot.qqplot(results['pvalues'].ravel())
            fig.tight_layout()

        if saveTo is not None:
            if self.name is not "":
                output = '{}/{}_{}.png'.format(saveTo, self.name, self.model)
            else:
                output = '{}/{}.png'.format(saveTo, self.model)
            fig.savefig(output)