Source code for limmbo.io.parser

import argparse

[docs]def getGWASargs(): parser = argparse.ArgumentParser(prog="runAssociation", description=('Models the association between phenotypes and ' 'genotypes, accepting additional covariates and parameters ' 'to account for population structure and relatedness between ' 'samples. Users can choose between single-trait and multi-' 'trait models, simple linear or linear mixed model set-ups.')) required = parser.add_argument_group('Basic required arguments') required.add_argument( '-p', '--file_pheno', action="store", dest="file_pheno", required=False, help=('Path [string] to [(N+1) x (P+1)] .csv file of [P] ' 'phenotypes with [N] samples (first column: sample IDs, first ' 'row: phenotype IDs). Default: %(default)s')) required.add_argument( '--pheno_delim', action="store", dest="pheno_delim", required=False, default=",", help=('Delimiter of phenotype file. g ' 'Default: %(default)s')) required.add_argument( '-g', '--file_geno', action="store", dest="file_genotypes", required=False, default=None, help=('Genotype file: either [S x N].csv file (first column: SNP id, ' 'first row: sample IDs) or `plink formated <https://www.cog-genomics.org/plink/1.9/input>`_ genotypes ' '(.bim/.fam/.bed). Default: %(default)s')) required.add_argument( '--geno_delim', action="store", dest="genotypes_delim", required=False, default=",", help=('Delimiter of genotype file (if not in `plink format <https://www.cog-genomics.org/plink/1.9/input>`_). ' 'Default: %(default)s')) required.add_argument( '-o', '--outdir', action="store", dest="outdir", required=True, help=('Path [string] of output directory; user needs writing ' 'permission. Default: %(default)s')) mode = required.add_mutually_exclusive_group(required=True) mode.add_argument( '-st', '--singletrait', action="store_true", dest="singletrait", default=False, help=('Set flag to conduct a single-trait association analyses' 'Default: %(default)s')) mode.add_argument( '-mt', '--multitrait', action="store_true", dest="multitrait", default=False, help=('Set flag to conduct a multi-trait association analyses' 'Default: %(default)s')) setup = required.add_mutually_exclusive_group(required=True) setup.add_argument( '-lm', '--lm', action="store_true", dest="lm", default=False, help=('Set flag to use a simple linear model for the association' 'analysis')) setup.add_argument( '-lmm', '--lmm', action="store_true", dest="lmm", default=False, help=('Set flag to use a linear mixed model for the association' 'analysis')) output = parser.add_argument_group('Output arguments') output.add_argument( '-n', '--name', action="store", dest="name", default="", help=('Name (used for output file naming). Default: ' '%(default)s')) output.add_argument( '-v', '--verbose', action="store_true", dest="verbose", required=False, default=False, help=('[bool]: should analysis progress be displayed. ' 'Default: %(default)s')) optionalfiles = parser.add_argument_group('Optional files') optionalfiles.add_argument( '-k', '--file_kinship', action="store", dest="file_relatedness", required=False, default=None, help=('Path [string] to [N x (N+1)] file of kinship/relatedness ' 'matrix with [N] samples (first row: sample IDs). Required when ' '--lmm/-lm. Default: ' '%(default)s')) optionalfiles.add_argument( '--kinship_delim', action="store", dest="relatedness_delim", required=False, default=",", help=('Delimiter of kinship file. g ' 'Default: %(default)s')) optionalfiles.add_argument( '-cg', '--file_cg', action="store", dest="file_cg", required=False, default=None, help=('Required for large phenotype sizes when --lmm/-lm; computed via ' 'runLiMMBo; specifies file name for genetic trait covariance ' ' matrix (rows: traits, columns: traits). Default: %(default)s')) optionalfiles.add_argument( '--cg_delim', action="store", dest="cg_delim", required=False, default=",", help=('Delimiter of Cg file. g Default: %(default)s')) optionalfiles.add_argument( '-cn', '--file_cn', action="store", dest="file_cn", required=False, default=None, help=('Required for large phenotype sizeswhen --lmm/-lm; computed via ' 'runLiMMBo; specifies file name for non-genetic trait covariance ' ' matrix (rows: traits, columns: traits). Default: %(default)s')) optionalfiles.add_argument( '--cn_delim', action="store", dest="cn_delim", required=False, default=",", help=('Delimiter of Cn file. g Default: %(default)s')) optionalfiles.add_argument( '-pcs', '--file_pcs', action="store", dest="file_pcs", required=False, default=None, help=('Path to [N x PCs] file of principal components from ' 'genotypes to be included as covariates (first column: ' 'sample IDs, first row: PC IDs); Default: %(default)s')) optionalfiles.add_argument( '--pcs_delim', action="store", dest="pcs_delim", required=False, default=",", help=('Delimiter of PCs file. g Default: %(default)s')) optionalfiles.add_argument( '-c', '--file_cov', action="store", dest="file_covariates", required=False, default=None, help=('Path [string] to [(N+1) x C] file of covariates matrix with ' '[N] samples and [K] covariates (first column: sample IDs, ' 'first row: phenotype IDs). Default: %(default)s')) optionalfiles.add_argument( '--covariate_delim', action="store", dest="covariate_delim", required=False, default=",", help=('Delimiter of covariates file. g ' 'Default: %(default)s')) optional = parser.add_argument_group('Optional association parameters') optional.add_argument( '-adjustP', '--adjustP', action="store", dest="adjustP", choices=['bonferroni', 'effective', 'None'], default=None, required=False, type=str, help=('Method to adjust single-trait p-values for' 'multiple hypothesis testing when running' 'multiple single-trait GWAS: bonferroni/effective number of ' 'tests `(Galwey,2009) <http://onlinelibrary.wiley.com/doi/10.1002/gepi.20408/abstract>`_' 'Default: %(default)s')) optional.add_argument( '-nrpermutations', '--nrpermutations', action="store", dest="nrpermutations", default=None, required=False, type=int, help=('Number of permutations for computing empirical p-values; ' '1/nrpermutations is maximum level of testing for ' 'significance. Default: %(default)s') ) optional.add_argument( '-fdr', '--fdr', action="store", dest="fdr", required=False, default=None, type=float, help=('FDR threshold for computing empirical FDR. Default: ' '%(default)s')) optional.add_argument( '-seed', '--seed', action="store", dest="seed", default=256, required=False, help=('Seed [int] to inittiate random number generation for ' 'permutations. Default: %(default)s'), type=int) data = parser.add_argument_group('Optional data processing parameters') data.add_argument( '-tr', '--transform_method', action="store", dest="transform", default='scale', choices=['scale', 'gaussian'], required=False, help=('Choose type [string] of data preprocessing: scale (mean ' 'center, divide by sd) or gaussian (inverse normalise). ' 'Default: %(default)s')) data.add_argument( '-reg', '--reg_covariates', action="store_true", dest="regress", required=False, help=('[bool]: should covariates be regressed out? Default: ' '%(default)s')) subset = parser.add_argument_group('Optional subsetting options') subset.add_argument( '-traitset', '--traitset', action="store", dest="traitstring", required=False, default=None, help=('Comma- (for list of traits) or hyphen- (for trait range) or ' 'comma and hyphen-separated list [string] of traits (trait ' 'columns) to choose; default: None (=all traits). Default: ' '%(default)s')) subset.add_argument( '-nrpcs', '--nrpcs', action="store", dest="nrpcs", required=False, default=10, help='First PCs to chose. Default: %(default)s', type= int) samples = subset.add_mutually_exclusive_group(required=False) samples.add_argument( '--file_samplelist', action="store", dest="file_samplelist", required=False, default=None, help=('Path [string] to file with samplelist for sample ' 'selection, with one sample ID per line. Default: %(default)s')) samples.add_argument( '--samplelist', action="store", dest="samplelist", required=False, default=None, help=('Comma-separated list [string] of samples IDs to restrict ' 'analysis to, e.g. ID1,ID2,ID5,ID9,ID10. Default: %(default)s')) plotting = parser.add_argument_group('Plot arguments', 'Arguments for depicting GWAS results as manhattan plot') plotting.add_argument( '--plot', action="store_true", dest="plot", required=False, default=False, help=('Set flag if results of association analysis should be depicted ' 'as manhattan and quantile-quantile plot')) plotting.add_argument( '-colourS', '--colourS', action="store", dest="colourS", required=False, default='DarkBlue', help=('Colour of significant points in manhattan plot')) plotting.add_argument( '-colourNS', '--colourNS', action="store", dest="colourNS", required=False, default='Orange', help=('Colour of non-significant points in manhattan plot')) plotting.add_argument( '-alphaS', '--alphaS', action="store", dest="alphaS", required=False, default=0.5, help=('Transparency of significant points in manhattan plot')) plotting.add_argument( '-alphaNS', '--alphaNS', action="store", dest="alphaNS", required=False, default=0.1, help=('Transparency of non-significant points in manhattan plot')) plotting.add_argument( '-thr', '--threshold', action="store", dest="thr", required=False, default=None, help=('Significance threshold; when --fdr specified, empirical fdr used ' 'as threshold')) version = parser.add_argument_group('Version') version.add_argument('--version', action='version', version='%(prog)s 0.1.0')
return parser
[docs]def getVarianceEstimationArgs(): parser = argparse.ArgumentParser(prog="runVarianceEstimation", description=('Estimates the genetic and non-genetic trait' 'covariance matrix parameters of a linear mixed model ' 'with random genetic and non-genetic effect via a ' 'bootstrapping-based approach.')) required = parser.add_argument_group('Basic required arguments') required.add_argument( '-p', '--file_pheno', action="store", dest="file_pheno", required=False, help=('Path [string] to [(N+1) x (P+1)] .csv file of [P] ' 'phenotypes with [N] samples (first column: sample IDs, first ' 'row: phenotype IDs). Default: %(default)s')) required.add_argument( '--pheno_delim', action="store", dest="pheno_delim", required=False, default=",", help=('Delimiter of phenotype file. g ' 'Default: %(default)s')) required.add_argument( '-k', '--file_kinship', action="store", dest="file_relatedness", required=False, default=None, help=('Path [string] to [N x (N+1)] file of kinship/relatedness ' 'matrix with [N] samples (first row: sample IDs). Required when ' '--lmm/-lm. Default: ' '%(default)s')) required.add_argument( '--kinship_delim', action="store", dest="relatedness_delim", required=False, default=",", help=('Delimiter of kinship file. g ' 'Default: %(default)s')) required.add_argument( '-o', '--outdir', action="store", dest="outdir", required=True, help=('Path [string] of output directory; user needs writing ' 'permission. Default: %(default)s')) optionalfiles = parser.add_argument_group('Optional files') optionalfiles.add_argument( '-c', '--file_cov', action="store", dest="file_covariates", required=False, default=None, help=('Path [string] to [(N+1) x C] file of covariates matrix with ' '[N] samples and [K] covariates (first column: sample IDs, ' 'first row: phenotype IDs). Default: %(default)s')) optionalfiles.add_argument( '--covariate_delim', action="store", dest="covariate_delim", required=False, default=",", help=('Delimiter of covariates file. g ' 'Default: %(default)s')) limmbo = parser.add_argument_group('Bootstrapping parameters') limmbo.add_argument( '-seed', '--seed', action="store", dest="seed", default=234, required=False, help=('seed [int] used to generate bootstrap matrix. Default: ' '%(default)s'), type=int) limmbo.add_argument( '-sp', '--smallp', action="store", dest="S", default=None, required=True, help=('Size [int] of phenotype subsamples used for variance ' 'decomposition. Default: %(default)s'), type=int) limmbo.add_argument( '-r', '--runs', action="store", dest="runs", default=None, required=False, help='Total number [int] of bootstrap runs. Default: %(default)s', type=int) limmbo.add_argument( '-t', '--timing', action="store_true", dest="timing", default=False, required=False, help=('[bool]: should variance decomposition be timed. Default: ' '%(default)s')) limmbo.add_argument( '--minCooccurrence', action="store", dest="minCooccurrence", default=3, type=int, required=False, help=('Minimum count [int] of the pairwise sampling of any given ' 'trait pair. Default: %(default)s')) limmbo.add_argument( '-i', '--iterations', action="store", dest="iterations", default=10, required=False, type=int, help=('Number [int] of iterations for variance decomposition ' 'attempts. Default: %(default)s')) limmbo.add_argument( '-cpus', '--cpus', action="store", dest="cpus", default=None, required=False, type=int, help=('Number [int] of available CPUs for parallelisation of ' 'variance decomposition steps. Default: %(default)s')) data = parser.add_argument_group('Optional data processing parameters') data.add_argument( '-tr', '--transform_method', action="store", dest="transform", default='scale', choices=['scale', 'gaussian'], required=False, help=('Choose type [string] of data preprocessing: scale (mean ' 'center, divide by sd) or gaussian (inverse normalise). ' 'Default: %(default)s')) data.add_argument( '-reg', '--reg_covariates', action="store_true", dest="regress", required=False, help=('[bool]: should covariates be regressed out? Default: ' '%(default)s')) subset = parser.add_argument_group('Optional subsetting options') subset.add_argument( '-traitset', '--traitset', action="store", dest="traitstring", required=False, default=None, help=('Comma- (for list of traits) or hyphen- (for trait range) or ' 'comma and hyphen-separated list [string] of traits (trait ' 'columns) to choose; default: None (=all traits). Default: ' '%(default)s')) samples = subset.add_mutually_exclusive_group(required=False) samples.add_argument( '--file_samplelist', action="store", dest="file_samplelist", required=False, default=None, help=('Path [string] to file with samplelist for sample ' 'selection, with one sample ID per line.Default: %(default)s')) samples.add_argument( '--samplelist', action="store", dest="samplelist", required=False, default=None, help=('Comma-separated list [string] of samples IDs to restrict ' 'analysis to, e.g. ID1,ID2,ID5,ID9,ID10. Default: %(default)s')) output = parser.add_argument_group('Output arguments') output.add_argument( '-dontSaveIntermediate', '--dontSaveIntermediate', action="store_false", dest="intermediate", default=True, required=False, help=('Set to suppress saving intermediate variance components. ' 'Default: %(default)s')) output.add_argument( '-v', '--verbose', action="store_true", dest="verbose", required=False, default=False, help=('[bool]: should analysis step description be printed. ' 'Default: %(default)s')) version = parser.add_argument_group('Version') version.add_argument('--version', action='version', version='%(prog)s 0.1.0')
return parser