deeprvat.data.dense_gt

Module Contents

Classes

DenseGTDataset

Functions

get_matched_sample_indices

# this function is supposed to do the same as # indices= np.array([np.where(x==iy)[0][0] for iy in y]) but is much faster #https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array

Data

logger

DEFAULT_CHROMOSOMES

AGGREGATIONS

API

deeprvat.data.dense_gt.logger = 'getLogger(...)'
deeprvat.data.dense_gt.DEFAULT_CHROMOSOMES = None
deeprvat.data.dense_gt.AGGREGATIONS = None
deeprvat.data.dense_gt.get_matched_sample_indices(x, y)

# this function is supposed to do the same as # indices= np.array([np.where(x==iy)[0][0] for iy in y]) but is much faster #https://stackoverflow.com/questions/8251541/numpy-for-every-element-in-one-array-find-the-index-in-another-array

Args:

x : query array y: query values. The function returns the index of each element of y in x

Returns:

np.array: Index of each element of y in x

class deeprvat.data.dense_gt.DenseGTDataset(gt_file: str = None, variant_file: str = None, split: str = '', train_dataset: Optional[torch.utils.data.Dataset] = None, chromosomes: List[str] = None, phenotype_file: Optional[str] = None, standardize_xpheno: bool = True, standardize_anno: bool = False, standardize_rare_anno: bool = False, standardize_rare_anno_columns: Optional[List] = None, standardize_rare_anno_params: Optional[Dict] = None, permute_y: bool = False, y_transformation: Optional[str] = None, x_phenotypes: List[str] = [], grouping_level: Optional[str] = 'gene', group_common: bool = False, return_sparse: bool = False, annotations: List[str] = [], annotation_file: Optional[str] = None, precomputed_annotations: Optional[Tuple[pandas.DataFrame, pandas.DataFrame, numpy.ndarray]] = None, annotation_aggregation: Union[str, dict] = 'max', y_phenotypes: List[str] = [], skip_y_na: bool = True, skip_x_na: bool = False, sample_file: str = None, sim_phenotype_file: Optional[str] = None, min_common_variant_count: Optional[int] = None, min_common_af: Optional[Dict[str, float]] = None, max_rare_af: Optional[Dict[str, float]] = None, use_common_variants: bool = True, use_rare_variants: bool = False, rare_embedding: Optional[Dict] = None, rare_ignore_unknown_gene: bool = True, exons_to_keep: Optional[Set[int]] = None, genes_to_keep: Optional[Set[str]] = None, gene_file: Optional[str] = None, gene_types_to_keep: Optional[List[str]] = None, ignore_by_annotation: Optional[List[Tuple[str, Any]]] = None, max_pval: Optional[Dict[str, float]] = None, variants: Optional[pandas.DataFrame] = None, variants_to_keep: Optional[Union[List[str], str]] = None, zarr_dir: Optional[str] = None, cache_matrices: bool = False, verbose: bool = False, return_genotypes: bool = True)

Bases: torch.utils.data.Dataset

Initialization

__getitem__(idx: int) torch.tensor
__len__() int
get_stand_params()
setup_phenotypes(phenotype_file: str, sim_phenotype_file: Optional[str], skip_y_na: bool, skip_x_na: bool, sample_file: Optional[str])
get_variant_ids(matrix_indices: numpy.ndarray) numpy.ndarray
dense_to_sparse(dense_genotype: Union[torch.Tensor, Dict[str, torch.Tensor]], keep_groups: bool = False) pandas.DataFrame
get_annotations(variant_ids: Union[numpy.ndarray, pandas.Series], group: bool = False, aggregate_groups: bool = False) Union[torch.Tensor, Dict[str, torch.Tensor]]
setup_zarr(zarr_dir: str)
transform_data()
setup_annotations(annotation_file: Optional[str], annotation_aggregation: Union[str, dict], precomputed_annotations: Optional[Tuple[pandas.DataFrame, pandas.DataFrame, numpy.ndarray]] = None)
setup_variants(min_common_variant_count: Optional[int], min_common_af: Optional[Dict[str, float]], train_variants: Optional[pandas.DataFrame])
get_variant_metadata(grouping_level: Optional[str])
abstractmethod setup_common_groups()
get_variant_groups()
get_common_variants(sparse_variants: numpy.ndarray, sparse_genotype: numpy.ndarray)
get_rare_variants(idx, all_sparse_variants, sparse_genotype)
collate_fn(batch: Dict[str, List[Union[int, torch.Tensor]]]) Dict[str, Union[torch.Tensor, List[str]]]
get_metadata() Dict[str, Any]