Source code for limmbo.core.vdbootstrap

from limmbo.utils.utils import verboseprint
from limmbo.utils.utils import nans
from limmbo.utils.utils import regularize
from limmbo.utils.utils import inflate_matrix

import scipy as sp
from scipy.optimize import fmin_l_bfgs_b as opt
import pandas as pd
import numpy as np
import bottleneck as bn
import time
import cPickle

import limix_legacy as dlimix
import limix.mtset

from limix_core.covar import FreeFormCov
import pp

class DataMismatch(Exception):
    """Raised when dimensions of sample/ID names do not match dimension of
    corresponding data"""
    pass

[docs]class LiMMBo(object):
    def __init__(self, datainput, S, timing=False, iterations=10,
            verbose=False):
        r"""
        Arguments:
            datainput ():
                fdas
            timing (bool):
	        if set to True, process time will be recorded
            iterations (int):

            S (int):
                subsampling size `S`

        """
        self.phenotypes = datainput.phenotypes
        self.relatedness = datainput.relatedness
        self.S = S
        self.timing = timing
        self.iterations = iterations
        self.verbose = verbose

        try:
            self.phenotypes = np.array(self.phenotypes)
        except:
            raise IOError("datainput.phenotypes cannot be converted to np.array")

	try:
	    self.relatedness = np.array(self.relatedness)
	except:
	    raise IOError("datainput.relatedness cannot be converted to np.array")
        
	if self.S > self.phenotypes.shape[1]:
            raise DataMismatch(("Subsampling size S ({}) greater than number "
                "of phnenotypes ({})").format(self.S, self.phenotypes.shape[1]))

[docs]    def runBootstrapCovarianceEstimation(self,
            seed, cpus, minCooccurrence=3, n=None):
        r"""
        Distribute variance decomposition of subset matrices via pp

        Arguments:
            seed (int):
                seed to initialise random number generator for bootstrapping
            minCooccurrence (int):
                minimum number a trait pair should be sampled; once reached
                for all trait pairs, sampling is stopped if n is None;
                default=3
            n (int):
                if not None, sets the total number of permutations,
                otherwise n determined by minCooccurrence;  default: None
            cpus (int):
                number of cpus available for covariance estimation

        Returns:
            (list):
                list containing variance components for all bootstrap runs
        """

        self.P = self.phenotypes.shape[1]
	self.__generateBootstrapMatrix(seed=seed, n=n,
                                       minCooccurrence=minCooccurrence)
        ppservers = ()
        jobs = []
        results = []

        if cpus is not None:
            job_server = pp.Server(cpus, ppservers=ppservers)
        else:
            job_server = pp.Server(ppservers=ppservers)

        verboseprint(
            'Number of CPUs available for parallelising: {}'.format(
            job_server.get_ncpus()),
            verbose=self.verbose)

        for bs in range(self.runs):
            pheno = self.__bootstrapPhenotypes(bs)
            verboseprint('Start vd for bootstrap nr {}'.format(bs + 1))
            jobs.append(
                job_server.submit(self.__VarianceDecomposition, (pheno, bs),
                                  (verboseprint, ), ("limix.mtset", "time")))

        for job in jobs:
            bsresult = job()
            bsresult['bootstrap'] = self.bootstrap_matrix[bsresult[
                'bsindex'], :]
            results.append(bsresult)

        return results

[docs]    def combineBootstrap(self, results):
        r"""
        Combine the [`S` x `S`] subset covariance matrices to find the overall
        [`P` x `P`] covariance matrices Cg and Cn.

        Arguments:
            results (list):
		results of runBootstrapVarianceDecomposition()

        Returns:
            (dictionary):
                dictionary containing:

                - **Cg_fit** (numpy.array):
		  [`P` x `P`] genetic covariance matrix via fitting
                - **Cn_fit** (numpy.array):
		  [`P` x `P`] noise covariance matrix via fitting
                - **Cg_average** (numpy.array):
		  [`P` x `P`] genetic covariance matrix via simple average
                - **Cn_average** (numpy.array):
		  [`P` x `P`] noise covariance matrix via simple average
                - **Cg_all_bs** (numpy.array):
		  [`runs` x `S` x `S`] genetic covariance matrices of `runs`
		  phenotype subsets of size `S`
                - **Cn_all_bs** (numpy.array):
		  [`runs` x `S` x `S`] noise covariance matrices of `runs`
		  phenotype subsets of size `S`
                - **proc_time_ind_bs** (list):
		  individual run times for all variance decomposition runs of
		  [`S` x `S`] Cg and Cn
                - **proc_time_sum_ind_bs** (list):
		  sum of individual run times for all variance decomposition
		  runs of [`S` x `S`] Cg and Cn
                - **proc_time_combine_bs** (list):
		  run time for finding [`P` x `P`] trait covariance estimates
		  from fitting [`S` x `S`] bootstrap covariance estimates
                - **nr_of_bs** (int):
		  number of bootstrap runs
                - **nr_of_successful_bs** (int):
		  total number of successful bootstrapping runs i.e. variance
		  decomposition converged
		- **results_fit_Cg** ():
		  results parameters of the bfgs-fit of the genetic covariance
		  matrices (via scipy.optimize.fmin_l_bfgs_g)
		- **results_fit_Cn** ():
		  results parameters of the bfgs-fit of the noise covariance
		  matrices (via scipy.optimize.fmin_l_bfgs_g)
        """

        verboseprint('Combine bootstrapping results...', verbose=self.verbose)
        time0 = time.clock()
        bs_results = self.__getBootstrapResults(results=results)
        time1 = time.clock()

        proc_time_combine_bs = time1 - time0
        proc_time_sum_ind_bs = np.array(bs_results['process_time_bs']).sum()

        verboseprint("Check Cg (average):", verbose=self.verbose)
        Cg_average, Cg_average_ev_min = regularize(bs_results['Cg_average'])
        verboseprint("Check Cn (average):", verbose=self.verbose)
        Cn_average, Cn_average_ev_min = regularize(bs_results['Cn_average'])
        verboseprint("Check Cg (fit):", verbose=self.verbose)
        Cg_fit, Cg_fit_ev_min = regularize(bs_results['Cg_fit'])
        verboseprint("Check Cn (fit):", verbose=self.verbose)
        Cn_fit, Cn_fit_ev_min = regularize(bs_results['Cn_fit'])

        results = {'Cg_fit': Cg_fit, 'Cn_fit': Cn_fit,
                   'Cg_average': Cg_average, 'Cn_average': Cn_average,
                   'Cg_all_bs': bs_results['Cg_bs'],
                   'Cn_all_bs': bs_results['Cn_bs'],
                   'proc_time_ind_bs': bs_results['process_time_bs'],
                   'proc_time_sum_ind_bs': proc_time_sum_ind_bs,
                   'proc_time_combine_bs': proc_time_combine_bs,
                   'nr_bs': self.runs,
                   'nr_successful_bs': bs_results['number_of_successful_bs'],
                   'results_fit_Cg': bs_results['results_fit_Cg'],
                   'results_fit_Cn': bs_results['results_fit_Cn']
                   }

        return results

[docs]    def saveVarianceComponents(self, resultsCombineBootstrap, output,
                               intermediate=True):
        r"""
        Save variance components as comma-separated files or python objects
        (via Cpickle).

        Arguments:
            resultsCombineBootstrap (dictionary):

                - **Cg_fit** (numpy.array):
                  [`P` x `P`] genetic covariance matrix via fitting
                - **Cn_fit** (numpy.array):
                  [`P` x `P`] noise covariance matrix via fitting
                - **Cg_average** (numpy.array):
                  [`P` x `P`] genetic covariance matrix via simple average
                - **Cn_average** (numpy.array):
                  [`P` x `P`] noise covariance matrix via simple average
                - **Cg_all_bs** (numpy.array):
                  [`runs` x `S` x `S`] genetic covariance matrices of `runs`
                  phenotype subsets of size `S`
                - **Cn_all_bs** (numpy.array):
                  [`runs` x `S` x `S`] noise covariance matrices of `runs`
                  phenotype subsets of size `S`
                - **proc_time_ind_bs** (list):
                  individual run times for all variance decomposition runs of
                  [`S` x `S`] Cg and Cn
                - **proc_time_sum_ind_bs** (list):
                  sum of individual run times for all variance decomposition
                  runs of [`S` x `S`] Cg and Cn
                - **proc_time_combine_bs** (list):
                  run time for finding [`P` x `P`] trait covariance estimates
                  from fitting [`S` x `S`] bootstrap covariance estimates
                - **nr_of_bs** (int):
                  number of bootstrap runs
                - **nr_of_successful_bs** (int):
                  total number of successful bootstrapping runs i.e. variance
                  decomposition converged
                - **results_fit_Cg** ():
                  results parameters of the bfgs-fit of the genetic covariance
                  matrices (via scipy.optimize.fmin_l_bfgs_g)
                - **results_fit_Cn** ():
                  results parameters of the bfgs-fit of the noise covariance
                  matrices (via scipy.optimize.fmin_l_bfgs_g)

            output (string):
                path/to/directory where variance components will be saved;
                needs writing permission
            intermediate (bool):
                if set to True, intermediate variance components (average
                covariance matrices, bootstrap matrices and results parameter
                of BFGS fit) are saved

        Returns:
            None
        """

        verboseprint("Generate output files", verbose=self.verbose)

        verboseprint("Write [`P` x `P`] covariance matrices",
            verbose=self.verbose)
        pd.DataFrame(resultsCombineBootstrap['Cg_fit']).to_csv(
            '{}/Cg_fit_seed{}.csv'.format(output, self.seed), sep=",",
            header=False, index=False)
        pd.DataFrame(resultsCombineBootstrap['Cn_fit']).to_csv(
            '{}/Cn_fit_seed{}.csv'.format(output, self.seed), sep=",",
            header=False, index=False)


        if intermediate:
            verboseprint("Write bootstrap matrix", verbose=self.verbose)
            pd.DataFrame(self.bootstrap_matrix).to_csv(
                '{}/bootstrap_matrix.csv'.format(output), sep=",", index=True,
                header=False)

            verboseprint("Save intermediate variance components",
                verbose=self.verbose)
            verboseprint(("Write covariance matrices based on average of "
                "bootstrap matrices"), verbose=self.verbose)
            pd.DataFrame(resultsCombineBootstrap['Cg_average']).to_csv(
                "%s/Cg_average_seed%s.csv" % (output, self.seed),
                sep=",", header=False, index=False)
            pd.DataFrame(resultsCombineBootstrap['Cn_average']).to_csv(
                "%s/Cn_average_seed%s.csv" % (output, self.seed),
                sep=",", header=False, index=False)

            verboseprint(("Pickle array of all [`S` x `S`] bootstrap "
                    "covariance matrices"), verbose=self.verbose)
            cPickle.dump(resultsCombineBootstrap['Cg_all_bs'],
                     open('{}/Cg_all_bootstraps.p'.format(output), "wb"))
            cPickle.dump(resultsCombineBootstrap['Cn_all_bs'],
                     open('{}/Cn_all_bootstraps.p'.format(output), "wb"))

            verboseprint(("Pickle result parameters of BFGS fit for fitting "
                "the [`S` x `S`] bootstrap covariance matrices to the "
                "[`P` x `P`] overall trait covariance matrices"),
                verbose=self.verbose)
            cPickle.dump(resultsCombineBootstrap['results_fit_Cg'],
                         open("%s/optimise_results_Cg.p" % (output), "wb"))
            cPickle.dump(resultsCombineBootstrap['results_fit_Cg'],
                         open("%s/optimise_results_Cn.p" % (output), "wb"))

        if self.timing:
            verboseprint("Save process times", verbose=self.verbose)
            pd.DataFrame(resultsCombineBootstrap['proc_time_ind_bs']).to_csv(
                "%s/process_time_all_bootstraps.csv" % (output),
                sep=",",
                header=False,
                index=False)
            overall_time =  pd.DataFrame(
                [resultsCombineBootstrap['proc_time_combine_bs'],
                 resultsCombineBootstrap['proc_time_sum_ind_bs']],
                index=["Proctime combine BS",
                       "Proctime sum of individual BS"])
            overall_time.to_csv("%s/process_time_summary.csv" % output,
                sep=",", header=False, index=True)

    def __generateBootstrapMatrix(self, seed=12321, minCooccurrence=3,
                                n=None):
        r"""
        Generate subsampling matrix.

        Arguments:
            seed (int, optional):
                for pseudo-random numbers generation; default: 12321
            minCooccurrence (int):
                minimum number a trait pair should be sampled; once reached
                for all trait pairs, sampling is stopped if n is None;
                default=3
            n (int):
                if not None, sets the total number of permutations,
                otherwise n determined by minCooccurrence;  default: None

        Returns:
            None:
                updates LiMMBo instance with:

             - **seed** (int):
               seed for pseudo-random numbers generation
             - **runs** (int):
		n, if n was not None, or determined once all trait-trait
		subsamplings have occurrd minCooccurence times
             - **counts_min** (int):
	       minimum trait-trait co-occurrence in sampling matrix
             - **bootstrap_matrix** (numpy.array):
		[`runs` x `S`] matrix containing bootstrap samples of numbers
		range(`P`)

        Examples:

             .. doctest::

                >>> from limmbo.io import input
                >>> from limmbo.core import vdbootstrap
                >>> import numpy as np
                >>> from numpy.random import RandomState
                >>> from numpy.linalg import cholesky as chol
                >>> random = RandomState(5)
                >>> P = 10
                >>> N = 1000
                >>> SNP = 1000
                >>> pheno = random.normal(0,1, (N, P))
                >>> pheno_samples = np.array(
                ...     ['S{}'.format(x+4) for x in range(N)])
                >>> phenotype_ID = np.array(
                ...     ['ID{}'.format(x+1) for x in range(P)])
                >>> X = (random.rand(N, SNP) < 0.3).astype(float)
                >>> relatedness = np.dot(X, X.T)/float(SNP)
                >>> relatedness_samples = np.array(
                ...     ['S{}'.format(x+1) for x in range(N)])
                >>> indata = input.InputData()
                >>> indata.addPhenotypes(phenotypes = pheno,
                ...                      pheno_samples = pheno_samples,
                ...                      phenotype_ID = phenotype_ID)
                >>> indata.addRelatedness(relatedness = relatedness,
                ...                  relatedness_samples = relatedness_samples)
		>>> limmbo = vdbootstrap.LiMMBo(datainput=indata, verbose=False)
		>>> limmbo.generateBootstrapMatrix(seed=12,
                ...               		   minCooccurrence=3)
		>>> limmbo.bootstrap_matrix[:5,:]
		array([[5, 8, 7, 0, 4],
		       [6, 0, 9, 7, 3],
		       [7, 0, 6, 3, 1],
		       [8, 4, 9, 7, 5],
		       [2, 4, 7, 1, 5]])
		>>> limmbo.runs
		30
		>>> limmbo.count
		3



        """
        rand_state = np.random.RandomState(seed)
        counts = sp.zeros((self.P, self.P))
        return_list = []

        if n is not None:
            verboseprint(
                ('Generate bootstrap matrix with {} bootstrap samples '
                 '(number of specified bootstraps').format(n),
                verbose=self.verbose)
            for i in xrange(n):
                bootstrap = rand_state.choice(a=range(self.P), size=self.S,
                    replace=False)
                return_list.append(bootstrap)
                index = np.ix_(np.array(bootstrap), np.array(bootstrap))
                counts[index] += 1
            self.runs = n
        else:
            while counts.min() < minCooccurrence:
                bootstrap = rand_state.choice(a=range(self.P), size=self.S,
                    replace=False)
                return_list.append(bootstrap)
                index = np.ix_(np.array(bootstrap), np.array(bootstrap))
                counts[index] += 1
            self.runs = len(return_list)
            verboseprint(
                ('Generated bootstrap matrix with {} bootstrap runs '
                'such that each trait-trait combination was '
		'sampled {}').format(self.runs, minCooccurrence),
                verbose=self.verbose)

        self.seed = seed
        self.bootstrap_matrix = np.array(return_list)
        self.counts_min = int(counts.min())

    def __bootstrapPhenotypes(self, bs):
        r"""
        Subsample [`S`] phenotypes with [`N`] samples form total of [`P`]
        phenotypes. Indices for subsampling provided in [`bs` x `S`]
        LiMMBo.bootstrap_matrix, where `bs` is the total number of bootstraps

        Arguments:
            bs (int):
                bootstrap index

        Uses:
            self.bootstrap_matrix (array-like):
                [`bs` x `S`] with subsampling indeces for phenotypes

        Returns:
            (numpy.array):

                - **phenotypes**:
		  [`N` x `S`] of subsampled phenotypes

        Examples:

             .. doctest::

		 >>> import pandas
		 >>> import numpy
		 >>> from numpy.random import RandomState
		 >>> from limmbo.io.input import InputData
		 >>> from limmbo.core.vdbootstrap import LiMMBo
		 >>> from numpy.linalg import cholesky as chol
		 >>> random = RandomState(10)
		 >>> P = 10
		 >>> S = 5
		 >>> N = 100
		 >>> S = 1000
		 >>> snps = (random.rand(N, S) < 0.2).astype(float)
		 >>> kinship = numpy.dot(snps, snps.T)/float(10)
		 >>> y  = random.randn(N,P)
		 >>> pheno = numpy.dot(chol(kinship),y)
		 >>> pheno_ID = ['PID{}'.format(x+1) for x in range(P)]
		 >>> samples = ['SID{}'.format(x+1) for x in range(N)]
		 >>> datainput = InputData()
		 >>> datainput.addPhenotypes(phenotypes = pheno,
		 ...                         phenotype_ID = pheno_ID,
		 ...                         pheno_samples = samples)
		 >>> datainput.addRelatedness(relatedness = kinship,
		 ...                          relatedness_samples = samples)
		 >>> limmbo = LiMMBo(datainput = datainput, verbose=False)
		 >>> limmbo.generateBootstrapMatrix(P = P, S = S)
		 >>> bootstrap_pheno_1 = limmbo.bootstrapPhenotypes(bs = 0)
		 >>> bootstrap_pheno_1.shape
		 (100, 5)
        """
        bootstrap = self.bootstrap_matrix[bs, :]
        phenotypes = self.phenotypes[:, bootstrap]

        return phenotypes


    def __getBootstrapResults(self, results):
        r"""
        Collect bootstrap results of [`S` x `S`] traits and combine all runs to
        total [`P` x `P`] covariance matrix

        Arguments:
            results (list):
                results of runBootstrapVarianceDecomposition()

        Uses:
            runs (int):
                number of bootstrapping runs executed for this experiment

        Returns:
            (dictionary):
                dictionary containing:

                - **Cg_fit** (numpy.array):
		  [`P` x `P`] genetic covariance matrix via fitting
                - **Cn_fit** (numpy.array):
		  [`P` x `P`] noise covariance matrix via fitting
                - **Cg_average** (numpy.array):
		  [`P` x `P`] genetic covariance matrix via simple average
                - **Cn_average** (numpy.array):
		  [`P` x `P`] noise covariance matrix via simple average
                - **Cg_bs** (numpy.array):
		  [`runs` x `S` x `S`] genetic covariance matrices of `runs`
		  phenotype subsets of size `S`
                - **Cn_bs** (numpy.array):
		  [`runs` x `S` x `S`] noise covariance matrices of `runs`
		  phenotype subsets of size `S`
                - **process_time_bs** (list):
		  run times for all variance decomposition runs of [`S` x `S`]
		  Cg and Cn
                - **number_of_successful_bs** (int):
		  total number of successful bootstrapping runs i.e. variance
		  decomposition converged
		- **results_fit_Cg** ():
		  results parameters of the bfgs-fit of the genetic covariance
		  matrices (via scipy.optimize.fmin_l_bfgs_g)
		- **results_fit_Cn** ():
		  results parameters of the bfgs-fit of the noise covariance
		  matrices (via scipy.optimize.fmin_l_bfgs_g)
        """

        # list to contain trait indices of each bootstrap run
        bootstrap = {}
        #sample_ID = None

        # create np.arrays of dimension PxP for mean and std of each entry in
        # covariance matrix
        Cg_average = sp.zeros((self.P, self.P))
        Cn_average = sp.zeros((self.P, self.P))

        # Process time per bootstrap
        process_time_bs = []

        # create np.arrays of dimension runs x P x P to store the bootstrapping
        # results for averaging
        Cg_bs_large = nans((self.runs, self.P, self.P))
        Cn_bs_large = nans((self.runs, self.P, self.P))

        Cg_bs = nans((self.runs, self.S, self.S))
        Cn_bs = nans((self.runs, self.S, self.S))

        n = 0
        for vdresult in results:
            bootstrap[n] = vdresult['bootstrap']
            process_time_bs.append(vdresult['process_time'])

            # store results of each bootstrap as matrix of inflated
            # matrices: NAs for traits that were not sampled
            if vdresult['Cg'] is None or vdresult['Cn'] is None:
                continue
            Cg_bs_large[n, :, :] = inflate_matrix(
                vdresult['Cg'], bootstrap[n], P=self.P, zeros=False)
            Cn_bs_large[n, :, :] = inflate_matrix(
                vdresult['Cn'], bootstrap[n], P=self.P, zeros=False)

            # store results of each bootstrap as matrix of small S x S
            # matrices generated by each bootstrap
            Cg_bs[n, :, :] = vdresult['Cg']
            Cn_bs[n, :, :] = vdresult['Cn']
            n += 1
        # Total number of successful bootstrapping runs
        number_of_bs = n - 1

        # Computing mean of bootstrapping results at
        # each position p1, p2 in overall PxP covariance matrix
        verboseprint(
            ("Computing mean of bootstrapping results"),
            verbose=self.verbose)
        for p1 in range(self.P):
            for p2 in range(self.P):
                vals = Cg_bs_large[:, p1, p2]
                Cg_average[p1, p2] = vals[~sp.isnan(vals)].mean()
                vals = Cn_bs_large[:, p1, p2]
                Cn_average[p1, p2] = vals[~sp.isnan(vals)].mean()

        Cg_reg, ev_g = regularize(Cg_average)
        Cn_reg, ev_n,  = regularize(Cn_average)

        verboseprint(
            ("Fitting bootstrapping results: minimize residual sum of "
             "squares over all bootstraps"),
            verbose=self.verbose)
        Cg_fit, results_fit_Cg = self.__fit_bootstrap_results(
            cov_init=Cg_reg,
            cov_bootstrap=Cg_bs,
            bootstrap_indeces=bootstrap,
            number_of_bs=number_of_bs,
            name="Cg")
        Cn_fit, results_fit_Cn = self.__fit_bootstrap_results(
            cov_init=Cn_reg,
            cov_bootstrap=Cn_bs,
            bootstrap_indeces=bootstrap,
            number_of_bs=number_of_bs,
            name="Cn")

        results = {'Cg_bs': Cg_bs, 'Cn_bs': Cn_bs,
                   'Cg_fit': Cg_fit, 'Cn_fit': Cn_fit,
                   'Cg_average': Cg_average, 'Cn_average': Cn_average,
                   'process_time_bs': process_time_bs,
                   'number_of_successful_bs': number_of_bs,
                   'results_fit_Cg': results_fit_Cg,
                   'results_fit_Cn': results_fit_Cn,
                   }

        return results

    def __fit_bootstrap_results(self, cov_init, cov_bootstrap,
                                bootstrap_indeces, number_of_bs, name):
        r"""
        Fit  bootstrap results of [`S` x `S`] traits and combine all runs to
	total [`P` x `P`] covariance matrix.

        Arguments:
            cov_init (array-like):
		[`P` x `P`] covariance matrix used to initialise fitting
            number_of_bs (int):
		number of successful bootstrapping runs executed for this
		experiment
            cov_bootstrap (array-like):
		[`number_of_bs` x `S` x `S`] bootstrap results
            bootstrap_indeces (array-like):
		[`number_of_bs` x `S`] matrix of bootstrapping indeces
            name (string):
		name of covariance matrix

        Returns:
            (tuple):
		tuple containing:

                - **C_opt_value** (array-like):
                  [`P` x `P`] covariance matrix if fit successful, else 1x1
		  matrix containing string 'did not converge'
		- **results_fit** ():
		  results parameters of the bfgs-fit of the covariance
		  matrix (via scipy.optimize.fmin_l_bfgs_g)


        """

        # initialise free-from Covariance Matrix: use simple average of
        # bootstraps as initial values
        # make use of Choleski decomposition and get parameters associated with
        # cov_init
        #C_init = dlimix.CFreeFormCF(self.P)
        #C_init.setParamsCovariance(cov_init)
        C_init = FreeFormCov(self.P)
        C_init.setCovariance(cov_init)
        params = C_init.getParams()

        # initialise free-form covariance matrix C_fit: parameters to be set in
        # optimize function
        #C_fit = dlimix.CFreeFormCF(self.P)
        C_fit = FreeFormCov(self.P)

        # Fit bootstrap results to obtain closest covariance matrix
        # use parameters obtained from mean-initiated covariance matrix above
        verboseprint(
            "Fitting parameters (minimizing rss via BFGS)...",
            verbose=self.verbose)
        results_fit = opt(
            self.__rss,
            x0=params,
            args=(C_fit, number_of_bs, bootstrap_indeces, cov_bootstrap),
            factr=1e12,
            iprint=2)

        if results_fit[2]['warnflag'] == 0:
            #C_opt = dlimix.CFreeFormCF(self.P)
            C_opt = FreeFormCov(self.P)
            C_opt.setParams(results_fit[0])
            C_opt_value = C_opt.K()
        else:
            C_opt_value = np.array(['did not converge'])

        return C_opt_value, results_fit

    def __rss(self, params, C, n_bootstrap, index_bootstrap, list_C):
        r"""
        Compute residual sum of squares and gradient for estimate of covariance
        matrix C and bootstrapped values of C.

        Arguments:
            params (array-like):
		of parameters used for initialising estimate C
		(length: 1/2*`P`*(`P`+1))
            C:
		intialised free-form covariance matrix, to be fitted
            n_bootstrap (int):
		number of successful bootstrapping runs executed for this
		experiment
            index_bootstrap (numpy.array):
		[`number_of_bs` x `S`] matrix of bootstrapping indeces
            list_C (list):
		list of n_bootstrap [`S` x `S`] bootstrapped covariance
		matrices

        Returns:
            (tuple):
                tuple containing:

                - **RSS_res**:
		  residual sum of squares
                - **RSS_grad_res**:
		  gradient of residual sum of squares function

        """

        # initialise values of covariance matrix with parameters (based on
        # cholesky decomposition)
        params = np.array(params)
        C.setParams(params)
        # get values of the covariance matrix
        C_value = C.K()
        # number of parameters: 1/2*P*(P+1)
        n_params = params.shape[0]
        # compute residual sum of squares between estimate of P x P Cg and
        # bootstrapped S x S Cgs
        RSS_res, index = self.__RSS_compute(n_bootstrap, C_value, list_C,
                                          index_bootstrap)
        # compute gradient (first derivative of rss at each bootstrap)
        RSS_grad_res = self.__RSS_grad_compute(n_bootstrap, n_params, C,
                                                C_value, list_C, index)

        return RSS_res, RSS_grad_res

    @staticmethod
    def __RSS_compute(n_bootstrap, C_value, list_C, index_bootstrap):
        r"""
        Compute residual sum of squares (rss) for each bootstrap run

        - used parameter to be optimized in quasi-Newton method (BFGS) of
          optimization
        - bootstrap matrices versus matrix to be optimized
        - matrix to be optimized initialised with average over all bootstraps
          for each position

        Arguments:
            n_bootstrap (int):
                number of successful bootstrap runs;
            C_value (array-like):
                [`P` x `P`] matrix to be optimized
            list_C (array-like):
                [`n_bootstrap` x `S` x `S`]  bootstrapped covariance matrices

        Returns:
            (tuple):
                tuple containing:

                - **res** (double):
		  sum of residual sum of squares over all `n_bootstrap` runs
                - **index** (list) :
		  [`n_bootstrap` x `S`] list containing trait indeces used in
		  each bootstrap run; to be passed to gradient computation

        """

        res = 0
        index = {}
        for i in range(n_bootstrap):
            index[i] = np.ix_(index_bootstrap[i], index_bootstrap[i])
            res += bn.ss(C_value[index[i]] - list_C[i], axis=None)
        return res, index

    @staticmethod
    def __RSS_grad_compute(n_bootstrap, n_params, C, C_value, list_C, index):
        r"""
        Compute gradient of residual sum of squares (rss) for each bootstrap
        run at each index

        - used as gradient parameter in quasi-Newton method (BFGS) of
          optimization
        - matrix to be optimized initialised with average over all bootstraps
          for each position

        Arguments:
            n_bootstrap (int):
                number of successful bootstrap runs;
            n_params (int):
                number of parameters of the model (parameters needed to build
                positive, semi-definite matrix with Choleski decomposition)
            C_value (array-like):
                [`P` x `P`] matrix to be optimized
            list_C (array-like):
                [`n_bootstrap` x `S` x `S`]  bootstrapped covariance matrices
            index (list):
                [`n_bootstrap `x `S`] trait indices used in each bootstrap run

        Returns:
            (numpy.array):

                - **res**:
		  [`n_params` x 1] sum of gradient over all successful bootstrap
                  runs for each parameter to be fitted

        """
        res = sp.zeros(n_params)
        for pi in range(n_params):
            #Cgrad = C.Kgrad_param(pi)
            Cgrad = C.K_grad_i(pi)
            for i in range(n_bootstrap):
                res[pi] += (2 * Cgrad[index[i]] *
                            (C_value[index[i]] - list_C[i])).sum()
        return res

    def __VarianceDecomposition(self, phenoSubset, bs=None):
        r"""
	Compute variance decomposition of phenotypes into genetic and noise
        covariance

        Arguments:
            phenoSubset (array-like):
                [`N` x `S`] phenotypes for which variance decomposition should
		be computed
            bs (int):
                number of subsample
            phenotypes (array-like):
                [`N` x `P`] original phenotypes
            relatedness (array-like):
                [`N` x `N`] kinship/genetic relatedness used in estimation of
                genetic component
            output (string):
                output directory; needed for caching

        Returns:
            (dictionary):
                dictionary containing:

                - **Cg** (numpy.array):
		  [`S` x `S`] genetic variance component
                - **Cn** (numpy.array):
		  [`S` x `S`] noise variance component
                - **process_time** (double):
		  cpu time of variance decomposition
                - **bsindex** (int):
		  number of subsample
        """

        # time variance decomposition
        t0 = time.clock()
        vd = limix.mtset.MTSet(Y=phenoSubset, R=self.relatedness)
        vd_null_info = vd.fitNull(
            cache=False,
            n_times=self.iterations,
            rewrite=True)
        t1 = time.clock()
        processtime = t1 - t0

        if vd_null_info['conv']:
            verboseprint(
                ('Variance decomposition for bootstrap number {} '
                'converged').format(bs + 1), verbose=self.verbose)
            Cg = vd_null_info['Cg']
            Cn = vd_null_info['Cn']
        else:
            verboseprint(
                ('Variance decomposition for bootstrap number {} '
                'did not converge').format(bs + 1), verbose=self.verbose)
            Cg = None
            Cn = None
        return {'Cg': Cg, 'Cn': Cn, 'process_time': processtime, 'bsindex': bs}