Source code for limmbo.core.vdsimple

from limmbo.utils.utils import verboseprint
from limmbo.utils.utils import regularize
from limix.mtset import MTSet as MTST

import time


[docs]def vd_reml(datainput, iterations=10, verbose=True):
    r"""
    Compute variance decomposition of phenotypes into genetic and noise
    covariance via standard REML: approach implemented in LIMIX

    Arguments:
        datainput (:class:`InputData`):
            object with ID-matched [`N` x `P`] phenotypes with [`N`]
            individuals and [`P`] phenotypes and [`N` x `N`] relatedness
            estimates
        output (string, optional):
            output directory with user-writing permissions; needed if
            caching is True
        cache (bool, optional):
            should results be cached
        verbose (bool, optional):
            should messages be printed to stdout

    Returns:
        (tuple):
            tuple containing:

            - **Cg** (numpy.array):
              [`P` x `P`] genetic variance component
            - **Cn** (numpy.array):
              [`P` x `P`] noise variance component
            - **process_time** (double):
              cpu time of variance decomposition

    Examples:

    .. doctest::

        >>> import numpy
        >>> from numpy.random import RandomState
        >>> from numpy.linalg import cholesky as chol
        >>> from limmbo.core.vdsimple import vd_reml
        >>> from limmbo.io.input import InputData
        >>> random = RandomState(10)
        >>> N = 100
        >>> S = 1000
        >>> P = 5
        >>> snps = (random.rand(N, S) < 0.2).astype(float)
        >>> kinship = numpy.dot(snps, snps.T) / float(10)
        >>> y  = random.randn(N, P)
        >>> pheno = numpy.dot(chol(kinship), y)
        >>> pheno_ID = [ 'PID{}'.format(x+1) for x in range(P)]
        >>> samples = [ 'SID{}'.format(x+1) for x in range(N)]
            >>> datainput = InputData()
        >>> datainput.addPhenotypes(phenotypes = pheno,
        ...                         phenotype_ID = pheno_ID,
        ...                         pheno_samples = samples)
        >>> datainput.addRelatedness(relatedness = kinship,
        ...                          relatedness_samples = samples)
        >>> Cg, Cn, ptime = vd_reml(datainput, verbose=False)
        >>> Cg.shape
        (5, 5)
    """

    verboseprint(
        "Estimate covariance matrices based on standard REML", verbose=verbose)

    # time variance decomposition
    t0 = time.clock()
    vd = MTST(Y=datainput.phenotypes, R=datainput.relatedness)
    vd_result = vd.fitNull(n_times=iterations, rewrite=True)
    t1 = time.clock()

    if vd_result['conv']:
        verboseprint(
            "Variance decomposition via REML converged", verbose=verbose)
        Cg = vd_result['Cg']
        Cn = vd_result['Cn']
    else:
        verboseprint(
            "Variance decomposition via REML did not converge",
            verbose=verbose)
    processtime = t1 - t0

    # ensure that matrices are true positive semi-definite matrices
    Cg, Cg_ev_min = regularize(Cg, verbose=verbose)
    Cn, Cn_ev_min = regularize(Cn, verbose=verbose)

    return Cg, Cn, processtime