Source code for limmbo.core.vdbootstrap

from limmbo.utils.utils import verboseprint
from limmbo.utils.utils import nans
from limmbo.utils.utils import regularize
from limmbo.utils.utils import inflate_matrix

import scipy as sp
from scipy.optimize import fmin_l_bfgs_b as opt
import pandas as pd
import numpy as np
import bottleneck as bn
import time
import cPickle

import limix_legacy as dlimix
import limix.mtset

from limix_core.covar import FreeFormCov
import pp

class DataMismatch(Exception):
    """Raised when dimensions of sample/ID names do not match dimension of
    corresponding data"""
    pass

[docs]class LiMMBo(object): def __init__(self, datainput, S, timing=False, iterations=10, verbose=False): r""" Arguments: datainput (): fdas timing (bool): if set to True, process time will be recorded iterations (int): S (int): subsampling size `S` """ self.phenotypes = datainput.phenotypes self.relatedness = datainput.relatedness self.S = S self.timing = timing self.iterations = iterations self.verbose = verbose try: self.phenotypes = np.array(self.phenotypes) except: raise IOError("datainput.phenotypes cannot be converted to np.array") try: self.relatedness = np.array(self.relatedness) except: raise IOError("datainput.relatedness cannot be converted to np.array") if self.S > self.phenotypes.shape[1]: raise DataMismatch(("Subsampling size S ({}) greater than number " "of phnenotypes ({})").format(self.S, self.phenotypes.shape[1]))
[docs] def runBootstrapCovarianceEstimation(self, seed, cpus, minCooccurrence=3, n=None): r""" Distribute variance decomposition of subset matrices via pp Arguments: seed (int): seed to initialise random number generator for bootstrapping minCooccurrence (int): minimum number a trait pair should be sampled; once reached for all trait pairs, sampling is stopped if n is None; default=3 n (int): if not None, sets the total number of permutations, otherwise n determined by minCooccurrence; default: None cpus (int): number of cpus available for covariance estimation Returns: (list): list containing variance components for all bootstrap runs """ self.P = self.phenotypes.shape[1] self.__generateBootstrapMatrix(seed=seed, n=n, minCooccurrence=minCooccurrence) ppservers = () jobs = [] results = [] if cpus is not None: job_server = pp.Server(cpus, ppservers=ppservers) else: job_server = pp.Server(ppservers=ppservers) verboseprint( 'Number of CPUs available for parallelising: {}'.format( job_server.get_ncpus()), verbose=self.verbose) for bs in range(self.runs): pheno = self.__bootstrapPhenotypes(bs) verboseprint('Start vd for bootstrap nr {}'.format(bs + 1)) jobs.append( job_server.submit(self.__VarianceDecomposition, (pheno, bs), (verboseprint, ), ("limix.mtset", "time"))) for job in jobs: bsresult = job() bsresult['bootstrap'] = self.bootstrap_matrix[bsresult[ 'bsindex'], :] results.append(bsresult)
return results
[docs] def combineBootstrap(self, results): r""" Combine the [`S` x `S`] subset covariance matrices to find the overall [`P` x `P`] covariance matrices Cg and Cn. Arguments: results (list): results of runBootstrapVarianceDecomposition() Returns: (dictionary): dictionary containing: - **Cg_fit** (numpy.array): [`P` x `P`] genetic covariance matrix via fitting - **Cn_fit** (numpy.array): [`P` x `P`] noise covariance matrix via fitting - **Cg_average** (numpy.array): [`P` x `P`] genetic covariance matrix via simple average - **Cn_average** (numpy.array): [`P` x `P`] noise covariance matrix via simple average - **Cg_all_bs** (numpy.array): [`runs` x `S` x `S`] genetic covariance matrices of `runs` phenotype subsets of size `S` - **Cn_all_bs** (numpy.array): [`runs` x `S` x `S`] noise covariance matrices of `runs` phenotype subsets of size `S` - **proc_time_ind_bs** (list): individual run times for all variance decomposition runs of [`S` x `S`] Cg and Cn - **proc_time_sum_ind_bs** (list): sum of individual run times for all variance decomposition runs of [`S` x `S`] Cg and Cn - **proc_time_combine_bs** (list): run time for finding [`P` x `P`] trait covariance estimates from fitting [`S` x `S`] bootstrap covariance estimates - **nr_of_bs** (int): number of bootstrap runs - **nr_of_successful_bs** (int): total number of successful bootstrapping runs i.e. variance decomposition converged - **results_fit_Cg** (): results parameters of the bfgs-fit of the genetic covariance matrices (via scipy.optimize.fmin_l_bfgs_g) - **results_fit_Cn** (): results parameters of the bfgs-fit of the noise covariance matrices (via scipy.optimize.fmin_l_bfgs_g) """ verboseprint('Combine bootstrapping results...', verbose=self.verbose) time0 = time.clock() bs_results = self.__getBootstrapResults(results=results) time1 = time.clock() proc_time_combine_bs = time1 - time0 proc_time_sum_ind_bs = np.array(bs_results['process_time_bs']).sum() verboseprint("Check Cg (average):", verbose=self.verbose) Cg_average, Cg_average_ev_min = regularize(bs_results['Cg_average']) verboseprint("Check Cn (average):", verbose=self.verbose) Cn_average, Cn_average_ev_min = regularize(bs_results['Cn_average']) verboseprint("Check Cg (fit):", verbose=self.verbose) Cg_fit, Cg_fit_ev_min = regularize(bs_results['Cg_fit']) verboseprint("Check Cn (fit):", verbose=self.verbose) Cn_fit, Cn_fit_ev_min = regularize(bs_results['Cn_fit']) results = {'Cg_fit': Cg_fit, 'Cn_fit': Cn_fit, 'Cg_average': Cg_average, 'Cn_average': Cn_average, 'Cg_all_bs': bs_results['Cg_bs'], 'Cn_all_bs': bs_results['Cn_bs'], 'proc_time_ind_bs': bs_results['process_time_bs'], 'proc_time_sum_ind_bs': proc_time_sum_ind_bs, 'proc_time_combine_bs': proc_time_combine_bs, 'nr_bs': self.runs, 'nr_successful_bs': bs_results['number_of_successful_bs'], 'results_fit_Cg': bs_results['results_fit_Cg'], 'results_fit_Cn': bs_results['results_fit_Cn'] }
return results
[docs] def saveVarianceComponents(self, resultsCombineBootstrap, output, intermediate=True): r""" Save variance components as comma-separated files or python objects (via Cpickle). Arguments: resultsCombineBootstrap (dictionary): - **Cg_fit** (numpy.array): [`P` x `P`] genetic covariance matrix via fitting - **Cn_fit** (numpy.array): [`P` x `P`] noise covariance matrix via fitting - **Cg_average** (numpy.array): [`P` x `P`] genetic covariance matrix via simple average - **Cn_average** (numpy.array): [`P` x `P`] noise covariance matrix via simple average - **Cg_all_bs** (numpy.array): [`runs` x `S` x `S`] genetic covariance matrices of `runs` phenotype subsets of size `S` - **Cn_all_bs** (numpy.array): [`runs` x `S` x `S`] noise covariance matrices of `runs` phenotype subsets of size `S` - **proc_time_ind_bs** (list): individual run times for all variance decomposition runs of [`S` x `S`] Cg and Cn - **proc_time_sum_ind_bs** (list): sum of individual run times for all variance decomposition runs of [`S` x `S`] Cg and Cn - **proc_time_combine_bs** (list): run time for finding [`P` x `P`] trait covariance estimates from fitting [`S` x `S`] bootstrap covariance estimates - **nr_of_bs** (int): number of bootstrap runs - **nr_of_successful_bs** (int): total number of successful bootstrapping runs i.e. variance decomposition converged - **results_fit_Cg** (): results parameters of the bfgs-fit of the genetic covariance matrices (via scipy.optimize.fmin_l_bfgs_g) - **results_fit_Cn** (): results parameters of the bfgs-fit of the noise covariance matrices (via scipy.optimize.fmin_l_bfgs_g) output (string): path/to/directory where variance components will be saved; needs writing permission intermediate (bool): if set to True, intermediate variance components (average covariance matrices, bootstrap matrices and results parameter of BFGS fit) are saved Returns: None """ verboseprint("Generate output files", verbose=self.verbose) verboseprint("Write [`P` x `P`] covariance matrices", verbose=self.verbose) pd.DataFrame(resultsCombineBootstrap['Cg_fit']).to_csv( '{}/Cg_fit_seed{}.csv'.format(output, self.seed), sep=",", header=False, index=False) pd.DataFrame(resultsCombineBootstrap['Cn_fit']).to_csv( '{}/Cn_fit_seed{}.csv'.format(output, self.seed), sep=",", header=False, index=False) if intermediate: verboseprint("Write bootstrap matrix", verbose=self.verbose) pd.DataFrame(self.bootstrap_matrix).to_csv( '{}/bootstrap_matrix.csv'.format(output), sep=",", index=True, header=False) verboseprint("Save intermediate variance components", verbose=self.verbose) verboseprint(("Write covariance matrices based on average of " "bootstrap matrices"), verbose=self.verbose) pd.DataFrame(resultsCombineBootstrap['Cg_average']).to_csv( "%s/Cg_average_seed%s.csv" % (output, self.seed), sep=",", header=False, index=False) pd.DataFrame(resultsCombineBootstrap['Cn_average']).to_csv( "%s/Cn_average_seed%s.csv" % (output, self.seed), sep=",", header=False, index=False) verboseprint(("Pickle array of all [`S` x `S`] bootstrap " "covariance matrices"), verbose=self.verbose) cPickle.dump(resultsCombineBootstrap['Cg_all_bs'], open('{}/Cg_all_bootstraps.p'.format(output), "wb")) cPickle.dump(resultsCombineBootstrap['Cn_all_bs'], open('{}/Cn_all_bootstraps.p'.format(output), "wb")) verboseprint(("Pickle result parameters of BFGS fit for fitting " "the [`S` x `S`] bootstrap covariance matrices to the " "[`P` x `P`] overall trait covariance matrices"), verbose=self.verbose) cPickle.dump(resultsCombineBootstrap['results_fit_Cg'], open("%s/optimise_results_Cg.p" % (output), "wb")) cPickle.dump(resultsCombineBootstrap['results_fit_Cg'], open("%s/optimise_results_Cn.p" % (output), "wb")) if self.timing: verboseprint("Save process times", verbose=self.verbose) pd.DataFrame(resultsCombineBootstrap['proc_time_ind_bs']).to_csv( "%s/process_time_all_bootstraps.csv" % (output), sep=",", header=False, index=False) overall_time = pd.DataFrame( [resultsCombineBootstrap['proc_time_combine_bs'], resultsCombineBootstrap['proc_time_sum_ind_bs']], index=["Proctime combine BS", "Proctime sum of individual BS"]) overall_time.to_csv("%s/process_time_summary.csv" % output,
sep=",", header=False, index=True) def __generateBootstrapMatrix(self, seed=12321, minCooccurrence=3, n=None): r""" Generate subsampling matrix. Arguments: seed (int, optional): for pseudo-random numbers generation; default: 12321 minCooccurrence (int): minimum number a trait pair should be sampled; once reached for all trait pairs, sampling is stopped if n is None; default=3 n (int): if not None, sets the total number of permutations, otherwise n determined by minCooccurrence; default: None Returns: None: updates LiMMBo instance with: - **seed** (int): seed for pseudo-random numbers generation - **runs** (int): n, if n was not None, or determined once all trait-trait subsamplings have occurrd minCooccurence times - **counts_min** (int): minimum trait-trait co-occurrence in sampling matrix - **bootstrap_matrix** (numpy.array): [`runs` x `S`] matrix containing bootstrap samples of numbers range(`P`) Examples: .. doctest:: >>> from limmbo.io import input >>> from limmbo.core import vdbootstrap >>> import numpy as np >>> from numpy.random import RandomState >>> from numpy.linalg import cholesky as chol >>> random = RandomState(5) >>> P = 10 >>> N = 1000 >>> SNP = 1000 >>> pheno = random.normal(0,1, (N, P)) >>> pheno_samples = np.array( ... ['S{}'.format(x+4) for x in range(N)]) >>> phenotype_ID = np.array( ... ['ID{}'.format(x+1) for x in range(P)]) >>> X = (random.rand(N, SNP) < 0.3).astype(float) >>> relatedness = np.dot(X, X.T)/float(SNP) >>> relatedness_samples = np.array( ... ['S{}'.format(x+1) for x in range(N)]) >>> indata = input.InputData() >>> indata.addPhenotypes(phenotypes = pheno, ... pheno_samples = pheno_samples, ... phenotype_ID = phenotype_ID) >>> indata.addRelatedness(relatedness = relatedness, ... relatedness_samples = relatedness_samples) >>> limmbo = vdbootstrap.LiMMBo(datainput=indata, verbose=False) >>> limmbo.generateBootstrapMatrix(seed=12, ... minCooccurrence=3) >>> limmbo.bootstrap_matrix[:5,:] array([[5, 8, 7, 0, 4], [6, 0, 9, 7, 3], [7, 0, 6, 3, 1], [8, 4, 9, 7, 5], [2, 4, 7, 1, 5]]) >>> limmbo.runs 30 >>> limmbo.count 3 """ rand_state = np.random.RandomState(seed) counts = sp.zeros((self.P, self.P)) return_list = [] if n is not None: verboseprint( ('Generate bootstrap matrix with {} bootstrap samples ' '(number of specified bootstraps').format(n), verbose=self.verbose) for i in xrange(n): bootstrap = rand_state.choice(a=range(self.P), size=self.S, replace=False) return_list.append(bootstrap) index = np.ix_(np.array(bootstrap), np.array(bootstrap)) counts[index] += 1 self.runs = n else: while counts.min() < minCooccurrence: bootstrap = rand_state.choice(a=range(self.P), size=self.S, replace=False) return_list.append(bootstrap) index = np.ix_(np.array(bootstrap), np.array(bootstrap)) counts[index] += 1 self.runs = len(return_list) verboseprint( ('Generated bootstrap matrix with {} bootstrap runs ' 'such that each trait-trait combination was ' 'sampled {}').format(self.runs, minCooccurrence), verbose=self.verbose) self.seed = seed self.bootstrap_matrix = np.array(return_list) self.counts_min = int(counts.min()) def __bootstrapPhenotypes(self, bs): r""" Subsample [`S`] phenotypes with [`N`] samples form total of [`P`] phenotypes. Indices for subsampling provided in [`bs` x `S`] LiMMBo.bootstrap_matrix, where `bs` is the total number of bootstraps Arguments: bs (int): bootstrap index Uses: self.bootstrap_matrix (array-like): [`bs` x `S`] with subsampling indeces for phenotypes Returns: (numpy.array): - **phenotypes**: [`N` x `S`] of subsampled phenotypes Examples: .. doctest:: >>> import pandas >>> import numpy >>> from numpy.random import RandomState >>> from limmbo.io.input import InputData >>> from limmbo.core.vdbootstrap import LiMMBo >>> from numpy.linalg import cholesky as chol >>> random = RandomState(10) >>> P = 10 >>> S = 5 >>> N = 100 >>> S = 1000 >>> snps = (random.rand(N, S) < 0.2).astype(float) >>> kinship = numpy.dot(snps, snps.T)/float(10) >>> y = random.randn(N,P) >>> pheno = numpy.dot(chol(kinship),y) >>> pheno_ID = ['PID{}'.format(x+1) for x in range(P)] >>> samples = ['SID{}'.format(x+1) for x in range(N)] >>> datainput = InputData() >>> datainput.addPhenotypes(phenotypes = pheno, ... phenotype_ID = pheno_ID, ... pheno_samples = samples) >>> datainput.addRelatedness(relatedness = kinship, ... relatedness_samples = samples) >>> limmbo = LiMMBo(datainput = datainput, verbose=False) >>> limmbo.generateBootstrapMatrix(P = P, S = S) >>> bootstrap_pheno_1 = limmbo.bootstrapPhenotypes(bs = 0) >>> bootstrap_pheno_1.shape (100, 5) """ bootstrap = self.bootstrap_matrix[bs, :] phenotypes = self.phenotypes[:, bootstrap] return phenotypes def __getBootstrapResults(self, results): r""" Collect bootstrap results of [`S` x `S`] traits and combine all runs to total [`P` x `P`] covariance matrix Arguments: results (list): results of runBootstrapVarianceDecomposition() Uses: runs (int): number of bootstrapping runs executed for this experiment Returns: (dictionary): dictionary containing: - **Cg_fit** (numpy.array): [`P` x `P`] genetic covariance matrix via fitting - **Cn_fit** (numpy.array): [`P` x `P`] noise covariance matrix via fitting - **Cg_average** (numpy.array): [`P` x `P`] genetic covariance matrix via simple average - **Cn_average** (numpy.array): [`P` x `P`] noise covariance matrix via simple average - **Cg_bs** (numpy.array): [`runs` x `S` x `S`] genetic covariance matrices of `runs` phenotype subsets of size `S` - **Cn_bs** (numpy.array): [`runs` x `S` x `S`] noise covariance matrices of `runs` phenotype subsets of size `S` - **process_time_bs** (list): run times for all variance decomposition runs of [`S` x `S`] Cg and Cn - **number_of_successful_bs** (int): total number of successful bootstrapping runs i.e. variance decomposition converged - **results_fit_Cg** (): results parameters of the bfgs-fit of the genetic covariance matrices (via scipy.optimize.fmin_l_bfgs_g) - **results_fit_Cn** (): results parameters of the bfgs-fit of the noise covariance matrices (via scipy.optimize.fmin_l_bfgs_g) """ # list to contain trait indices of each bootstrap run bootstrap = {} #sample_ID = None # create np.arrays of dimension PxP for mean and std of each entry in # covariance matrix Cg_average = sp.zeros((self.P, self.P)) Cn_average = sp.zeros((self.P, self.P)) # Process time per bootstrap process_time_bs = [] # create np.arrays of dimension runs x P x P to store the bootstrapping # results for averaging Cg_bs_large = nans((self.runs, self.P, self.P)) Cn_bs_large = nans((self.runs, self.P, self.P)) Cg_bs = nans((self.runs, self.S, self.S)) Cn_bs = nans((self.runs, self.S, self.S)) n = 0 for vdresult in results: bootstrap[n] = vdresult['bootstrap'] process_time_bs.append(vdresult['process_time']) # store results of each bootstrap as matrix of inflated # matrices: NAs for traits that were not sampled if vdresult['Cg'] is None or vdresult['Cn'] is None: continue Cg_bs_large[n, :, :] = inflate_matrix( vdresult['Cg'], bootstrap[n], P=self.P, zeros=False) Cn_bs_large[n, :, :] = inflate_matrix( vdresult['Cn'], bootstrap[n], P=self.P, zeros=False) # store results of each bootstrap as matrix of small S x S # matrices generated by each bootstrap Cg_bs[n, :, :] = vdresult['Cg'] Cn_bs[n, :, :] = vdresult['Cn'] n += 1 # Total number of successful bootstrapping runs number_of_bs = n - 1 # Computing mean of bootstrapping results at # each position p1, p2 in overall PxP covariance matrix verboseprint( ("Computing mean of bootstrapping results"), verbose=self.verbose) for p1 in range(self.P): for p2 in range(self.P): vals = Cg_bs_large[:, p1, p2] Cg_average[p1, p2] = vals[~sp.isnan(vals)].mean() vals = Cn_bs_large[:, p1, p2] Cn_average[p1, p2] = vals[~sp.isnan(vals)].mean() Cg_reg, ev_g = regularize(Cg_average) Cn_reg, ev_n, = regularize(Cn_average) verboseprint( ("Fitting bootstrapping results: minimize residual sum of " "squares over all bootstraps"), verbose=self.verbose) Cg_fit, results_fit_Cg = self.__fit_bootstrap_results( cov_init=Cg_reg, cov_bootstrap=Cg_bs, bootstrap_indeces=bootstrap, number_of_bs=number_of_bs, name="Cg") Cn_fit, results_fit_Cn = self.__fit_bootstrap_results( cov_init=Cn_reg, cov_bootstrap=Cn_bs, bootstrap_indeces=bootstrap, number_of_bs=number_of_bs, name="Cn") results = {'Cg_bs': Cg_bs, 'Cn_bs': Cn_bs, 'Cg_fit': Cg_fit, 'Cn_fit': Cn_fit, 'Cg_average': Cg_average, 'Cn_average': Cn_average, 'process_time_bs': process_time_bs, 'number_of_successful_bs': number_of_bs, 'results_fit_Cg': results_fit_Cg, 'results_fit_Cn': results_fit_Cn, } return results def __fit_bootstrap_results(self, cov_init, cov_bootstrap, bootstrap_indeces, number_of_bs, name): r""" Fit bootstrap results of [`S` x `S`] traits and combine all runs to total [`P` x `P`] covariance matrix. Arguments: cov_init (array-like): [`P` x `P`] covariance matrix used to initialise fitting number_of_bs (int): number of successful bootstrapping runs executed for this experiment cov_bootstrap (array-like): [`number_of_bs` x `S` x `S`] bootstrap results bootstrap_indeces (array-like): [`number_of_bs` x `S`] matrix of bootstrapping indeces name (string): name of covariance matrix Returns: (tuple): tuple containing: - **C_opt_value** (array-like): [`P` x `P`] covariance matrix if fit successful, else 1x1 matrix containing string 'did not converge' - **results_fit** (): results parameters of the bfgs-fit of the covariance matrix (via scipy.optimize.fmin_l_bfgs_g) """ # initialise free-from Covariance Matrix: use simple average of # bootstraps as initial values # make use of Choleski decomposition and get parameters associated with # cov_init #C_init = dlimix.CFreeFormCF(self.P) #C_init.setParamsCovariance(cov_init) C_init = FreeFormCov(self.P) C_init.setCovariance(cov_init) params = C_init.getParams() # initialise free-form covariance matrix C_fit: parameters to be set in # optimize function #C_fit = dlimix.CFreeFormCF(self.P) C_fit = FreeFormCov(self.P) # Fit bootstrap results to obtain closest covariance matrix # use parameters obtained from mean-initiated covariance matrix above verboseprint( "Fitting parameters (minimizing rss via BFGS)...", verbose=self.verbose) results_fit = opt( self.__rss, x0=params, args=(C_fit, number_of_bs, bootstrap_indeces, cov_bootstrap), factr=1e12, iprint=2) if results_fit[2]['warnflag'] == 0: #C_opt = dlimix.CFreeFormCF(self.P) C_opt = FreeFormCov(self.P) C_opt.setParams(results_fit[0]) C_opt_value = C_opt.K() else: C_opt_value = np.array(['did not converge']) return C_opt_value, results_fit def __rss(self, params, C, n_bootstrap, index_bootstrap, list_C): r""" Compute residual sum of squares and gradient for estimate of covariance matrix C and bootstrapped values of C. Arguments: params (array-like): of parameters used for initialising estimate C (length: 1/2*`P`*(`P`+1)) C: intialised free-form covariance matrix, to be fitted n_bootstrap (int): number of successful bootstrapping runs executed for this experiment index_bootstrap (numpy.array): [`number_of_bs` x `S`] matrix of bootstrapping indeces list_C (list): list of n_bootstrap [`S` x `S`] bootstrapped covariance matrices Returns: (tuple): tuple containing: - **RSS_res**: residual sum of squares - **RSS_grad_res**: gradient of residual sum of squares function """ # initialise values of covariance matrix with parameters (based on # cholesky decomposition) params = np.array(params) C.setParams(params) # get values of the covariance matrix C_value = C.K() # number of parameters: 1/2*P*(P+1) n_params = params.shape[0] # compute residual sum of squares between estimate of P x P Cg and # bootstrapped S x S Cgs RSS_res, index = self.__RSS_compute(n_bootstrap, C_value, list_C, index_bootstrap) # compute gradient (first derivative of rss at each bootstrap) RSS_grad_res = self.__RSS_grad_compute(n_bootstrap, n_params, C, C_value, list_C, index) return RSS_res, RSS_grad_res @staticmethod def __RSS_compute(n_bootstrap, C_value, list_C, index_bootstrap): r""" Compute residual sum of squares (rss) for each bootstrap run - used parameter to be optimized in quasi-Newton method (BFGS) of optimization - bootstrap matrices versus matrix to be optimized - matrix to be optimized initialised with average over all bootstraps for each position Arguments: n_bootstrap (int): number of successful bootstrap runs; C_value (array-like): [`P` x `P`] matrix to be optimized list_C (array-like): [`n_bootstrap` x `S` x `S`] bootstrapped covariance matrices Returns: (tuple): tuple containing: - **res** (double): sum of residual sum of squares over all `n_bootstrap` runs - **index** (list) : [`n_bootstrap` x `S`] list containing trait indeces used in each bootstrap run; to be passed to gradient computation """ res = 0 index = {} for i in range(n_bootstrap): index[i] = np.ix_(index_bootstrap[i], index_bootstrap[i]) res += bn.ss(C_value[index[i]] - list_C[i], axis=None) return res, index @staticmethod def __RSS_grad_compute(n_bootstrap, n_params, C, C_value, list_C, index): r""" Compute gradient of residual sum of squares (rss) for each bootstrap run at each index - used as gradient parameter in quasi-Newton method (BFGS) of optimization - matrix to be optimized initialised with average over all bootstraps for each position Arguments: n_bootstrap (int): number of successful bootstrap runs; n_params (int): number of parameters of the model (parameters needed to build positive, semi-definite matrix with Choleski decomposition) C_value (array-like): [`P` x `P`] matrix to be optimized list_C (array-like): [`n_bootstrap` x `S` x `S`] bootstrapped covariance matrices index (list): [`n_bootstrap `x `S`] trait indices used in each bootstrap run Returns: (numpy.array): - **res**: [`n_params` x 1] sum of gradient over all successful bootstrap runs for each parameter to be fitted """ res = sp.zeros(n_params) for pi in range(n_params): #Cgrad = C.Kgrad_param(pi) Cgrad = C.K_grad_i(pi) for i in range(n_bootstrap): res[pi] += (2 * Cgrad[index[i]] * (C_value[index[i]] - list_C[i])).sum() return res def __VarianceDecomposition(self, phenoSubset, bs=None): r""" Compute variance decomposition of phenotypes into genetic and noise covariance Arguments: phenoSubset (array-like): [`N` x `S`] phenotypes for which variance decomposition should be computed bs (int): number of subsample phenotypes (array-like): [`N` x `P`] original phenotypes relatedness (array-like): [`N` x `N`] kinship/genetic relatedness used in estimation of genetic component output (string): output directory; needed for caching Returns: (dictionary): dictionary containing: - **Cg** (numpy.array): [`S` x `S`] genetic variance component - **Cn** (numpy.array): [`S` x `S`] noise variance component - **process_time** (double): cpu time of variance decomposition - **bsindex** (int): number of subsample """ # time variance decomposition t0 = time.clock() vd = limix.mtset.MTSet(Y=phenoSubset, R=self.relatedness) vd_null_info = vd.fitNull( cache=False, n_times=self.iterations, rewrite=True) t1 = time.clock() processtime = t1 - t0 if vd_null_info['conv']: verboseprint( ('Variance decomposition for bootstrap number {} ' 'converged').format(bs + 1), verbose=self.verbose) Cg = vd_null_info['Cg'] Cn = vd_null_info['Cn'] else: verboseprint( ('Variance decomposition for bootstrap number {} ' 'did not converge').format(bs + 1), verbose=self.verbose) Cg = None Cn = None
return {'Cg': Cg, 'Cn': Cn, 'process_time': processtime, 'bsindex': bs}