Source code for meanvar.mvstandardizer
import numpy
import scipy.stats
from pygwas.pheno_covar import PhenoCovar
import pygwas.standardizer
import sys
__copyright__ = "Copyright (C) 2015 Todd Edwards, Chun Li and Eric Torstenson"
__license__ = "GPL3.0"
# This file is part of MVtest.
#
# MVtest is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# MVtest is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with MVtest. If not, see <http://www.gnu.org/licenses/>.
class Standardizer(pygwas.standardizer.StandardizedVariable):
[docs] """Optional plugin object that can be used to standardize covariate and
phenotype data.
Many algorithms require that input be standardized in some way in order
to work properly, however, rescaling the results is algorithm specific.
In order to facilitate this situation, application authors can
write up application specific Standardization objects for use with
the data parsers.
"""
def __init__(self, pc):
super(Standardizer, self).__init__(pc)
def standardize(self):
[docs] """Standardize the variables within a range [-1.0 and 1.0]
This replaces the local copies of this data. When it's time to
scale back, use destandardize from the datasource for that.
"""
nonmissing = numpy.invert(self.missing)
nmcount = numpy.sum(nonmissing)
self.covariates = []
self.phenotypes = []
for idx in range(0, self.covar_count):
x = self.datasource.covariate_data[idx]
nonmissing = x != PhenoCovar.missing_encoding
mx = numpy.mean(x[nonmissing])
sx = numpy.std(x[nonmissing])
self.covariates.append((x-mx)/sx)
for idx in range(0, self.pheno_count):
y = self.datasource.phenotype_data[idx]
nonmissing = y != PhenoCovar.missing_encoding
my = numpy.mean(y[nonmissing])
sy = numpy.std(y[nonmissing])
self.phenotypes.append((y-my)/sy)
def destandardize(self, estimates, se, **kwargs):
[docs] """Revert the betas and variance components back to the original scale.
"""
pvalues = kwargs["pvalues"]
v = kwargs["v"]
nonmissing=kwargs["nonmissing"]
pheno = self.datasource.phenotype_data[self.idx][self.datasource.phenotype_data[self.idx] != PhenoCovar.missing_encoding]
covariates = []
mmx = []
ssx = []
a = [1,0]
for c in self.datasource.covariate_data:
covariates.append(c[c!=PhenoCovar.missing_encoding])
mmx.append(numpy.mean(covariates[-1]))
ssx.append(numpy.std(covariates[-1]))
a.append(-mmx[-1]/ssx[-1])
if len(mmx) < 1:
mmx = 1
ssx = 1
else:
mmx = numpy.array(mmx)
ssx = numpy.array(ssx)
covariates = numpy.array(covariates)
ssy = numpy.std(pheno)
mmy = numpy.mean(pheno)
# Quick scratch pad for Chun's new destandardization
ccount = len(covariates) + 2
a=numpy.array(a)
meanpar = list([mmy + ssy * (estimates[0] - numpy.sum(estimates[2:ccount]*mmx/ssx)),
estimates[1] * ssy])
meanse = [ssy*numpy.sqrt(a.dot(v[0:ccount, 0:ccount]).dot(a.transpose())),
ssy * se[1]]
varpar = [2*numpy.log(ssy) + estimates[ccount] - numpy.sum(estimates[ccount+2:]*mmx/ssx),
estimates[ccount+1]]
varse = [numpy.sqrt(a.dot(v[ccount:, ccount:]).dot(a.transpose())),
se[ccount+1]]
if ccount > 2:
meanpar += list(estimates[2:ccount] * ssy / ssx)
meanse += list(ssy * se[2:ccount]/ssx)
varpar += list(estimates[ccount+2:]/ssx)
varse += list(se[ccount+2:]/ssx)
pvals = 2*scipy.stats.norm.cdf(-numpy.absolute(numpy.array(meanpar+varpar)/numpy.array(meanse+varse)))
return meanpar+varpar, meanse + varse, pvals