Source code for pygwas.standardizer
import numpy
import pheno_covar
from exceptions import InvariantVar
__copyright__ = "Eric Torstenson"
__license__ = "GPL3.0"
# This file is part of pyGWAS.
#
# pyGWAS is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# pyGWAS is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with MVtest. If not, see <http://www.gnu.org/licenses/>.
def get_standardizer():
[docs] global _standardizer
return _standardizer
def set_standardizer(std):
[docs] global _standardizer
_standardizer = std
class StandardizedVariable(object):
[docs] """Optional plugin object that can be used to standardize covariate and
phenotype data.
Many algorithms require that input be standardized in some way in order
to work properly, however, rescaling the results is algorithm specific.
In order to facilitate this situation, application authors can
write up application specific Standardization objects for use with
the data parsers.
"""
def __init__(self, pc):
#: mask representing missingness (1 indicates missing)
self.missing = []
#: number of covars
self.covar_count = len(pc.covariate_data)
#: number of phenotypes
self.pheno_count = len(pc.phenotype_data)
#: Standardized covariate data
self.covariates = None
#: standardized phenotype data
self.phenotypes = None
for pheno in pc.phenotype_data:
missing = pheno == pheno_covar.PhenoCovar.missing_encoding
for idx in range(0, self.covar_count):
missing = missing | (pc.covariate_data[idx] == pheno_covar.PhenoCovar.missing_encoding)
self.missing.append(missing)
#: index of the current phenotype
self.idx = 0
#: Reference back to the pheno_covar object for access to raw data
self.datasource = pc
def get_variables(self, missing_in_geno=None):
[docs] """Extract the complete set of data based on missingness over all
for the current locus.
:param missing_in_geno: mask associated with missingness in genotype
:return: (phenotypes, covariates, nonmissing used for this set of vars)
"""
count = 0
mismatch = 0
if missing_in_geno is None:
nonmissing = numpy.invert(self.missing[self.idx])
else:
nonmissing = numpy.invert(self.missing[self.idx] | missing_in_geno)
nmcount = sum(nonmissing)
covars = numpy.zeros((self.covar_count, nmcount))
for idx in range(0, self.covar_count):
covars[idx] = self.covariates[idx][nonmissing]
min = covars[idx][covars[idx] != pheno_covar.PhenoCovar.missing_encoding].min()
max = covars[idx][covars[idx] != pheno_covar.PhenoCovar.missing_encoding].max()
if min == max:
raise InvariantVar("Covar %s doesn't have enough variation to continue" % (self.datasource.covariate_labels[idx]))
min = self.phenotypes[self.idx][nonmissing].min()
max = self.phenotypes[self.idx][nonmissing].max()
if min == max:
raise InvariantVar("Phenotype %s doesn't have enough variation to continue" % (self.datasource.phenotype_names[self.idx]))
return (self.phenotypes[self.idx][nonmissing], covars, nonmissing)
def get_phenotype_name(self):
[docs] """Returns current phenotype name"""
return self.datasource.phenotype_names[self.idx]
def get_covariate_name(self, idx):
[docs] """Return label for a specific covariate
:param idx: which covariate?
:return: string label
"""
return self.datasource.covariate_labels[idx]
def get_covariate_names(self):
[docs] """Return all covariate labels as a list
:return: list of covariate names
"""
return self.datasource.covariate_labels
def standardize(self):
[docs] """Stub for the appropriate standardizer function
Each Standardizer object will do it's own thing here.
"""
pass
def destandardize(self):
[docs] """Stub for the appropriate destandardizer function.
Each object type will do it's own thing here.
"""
pass
class NoStandardization(StandardizedVariable):
[docs] """This is mostly a placeholder for standardizers. Each application will
probably have a specific approach to standardizing/destandardizing the
input/output.
"""
def __init__(self, pc):
super(NoStandardization, self).__init__(pc)
def standardize(self):
[docs] """Standardize the variables within a range [-1.0 and 1.0]
This replaces the local copies of this data. When it's time to
scale back, use destandardize from the datasource for that.
"""
self.covariates = self.datasource.covariate_data
self.phenotypes = self.datasource.phenotype_data
def destandardize(self, estimates, se, **kwargs):
[docs] """When the pheno/covar data has been standardized, this can be
used to rescale the betas back to a meaningful value using the
original data.
For the "Un-standardized" data, we do no conversion.
"""
return estimates, se, kwargs["pvalues"]
#: This should be set to an appropriate object by the application
_standardizer = NoStandardization