Module epiclass.core.data_source
Module for reading source data files.
Classes
class EpiDataSource (hdf5: Path, chromsize: Path, metadata: Path)
-
Used to contain source files.
Expand source code
class EpiDataSource: """Used to contain source files.""" def __init__(self, hdf5: Path, chromsize: Path, metadata: Path): self._hdf5 = hdf5 self._chromsize = chromsize self._metadata = metadata self.check_paths() @property def hdf5_file(self) -> Path: """Return hdf5 list file path.""" return self._hdf5 @property def chromsize_file(self) -> Path: """Return chromsize file path.""" return self._chromsize @property def metadata_file(self) -> Path: """Return metadata file path.""" return self._metadata def check_paths(self) -> None: """Make sure files exist. Raise error otherwise""" for path in [self._hdf5, self._chromsize, self._metadata]: if not path.is_file(): raise OSError( f"File does not exist : {path}.\n Expected file at : {path.resolve()}" ) @staticmethod def get_file_list(hdf5_list_path: Path) -> List[Path]: """Return list of hdf5 files.""" with open(hdf5_list_path, "r", encoding="utf-8") as my_file: return [Path(line.rstrip("\n")) for line in my_file] def hdf5_resolution(self) -> int: """Return resolution as an integer.""" with open(self.hdf5_file, "r", encoding="utf-8") as my_file: first_path = Path(next(my_file).rstrip()) try: resolution = self.get_file_hdf5_resolution(first_path) except (KeyError, FileNotFoundError) as err: warnings.warn(f"{err}. Seeking resolution from filename.") try: resolution = self.get_resolution_from_filename(first_path) except KeyError as err2: raise KeyError( f"Filename does not contain resolution: {first_path}" ) from err2 return resolution @staticmethod def get_resolution_from_filename(path: Path) -> int: """Return resolution as an integer.""" resolution_string = path.name.split("_")[1] return HDF5_RESOLUTION[resolution_string] @staticmethod def get_file_hdf5_resolution(hdf5_file: Path) -> int: """Return resolution as an integer.""" with h5py.File(hdf5_file, "r") as h5_file: try: resolution = int(h5_file.attrs["bin"][0]) # type: ignore except KeyError as err: raise KeyError( f"Resolution not found in {hdf5_file}. (attribute 'bin' does not exist)" ) from err return resolution @staticmethod def load_external_chrom_file(chrom_file: Path | str) -> List[Tuple[str, int]]: """Return sorted list with chromosome (name, size) pairs.""" with open(chrom_file, "r", encoding="utf-8") as my_file: pairs = [line.rstrip("\n").split() for line in my_file] return sorted([(name, int(size)) for name, size in pairs]) def load_chrom_sizes(self) -> List[Tuple[str, int]]: """Return sorted list with chromosome (name, size) pairs. This order is the same as the order of chroms in the concatenated signals. """ return self.load_external_chrom_file(self.chromsize_file)
Static methods
def get_file_hdf5_resolution(hdf5_file: Path) ‑> int
-
Return resolution as an integer.
def get_file_list(hdf5_list_path: Path) ‑> List[pathlib.Path]
-
Return list of hdf5 files.
def get_resolution_from_filename(path: Path) ‑> int
-
Return resolution as an integer.
def load_external_chrom_file(chrom_file: Path | str)
-
Return sorted list with chromosome (name, size) pairs.
Instance variables
prop chromsize_file : Path
-
Return chromsize file path.
Expand source code
@property def chromsize_file(self) -> Path: """Return chromsize file path.""" return self._chromsize
prop hdf5_file : Path
-
Return hdf5 list file path.
Expand source code
@property def hdf5_file(self) -> Path: """Return hdf5 list file path.""" return self._hdf5
prop metadata_file : Path
-
Return metadata file path.
Expand source code
@property def metadata_file(self) -> Path: """Return metadata file path.""" return self._metadata
Methods
def check_paths(self) ‑> None
-
Make sure files exist. Raise error otherwise
def hdf5_resolution(self) ‑> int
-
Return resolution as an integer.
def load_chrom_sizes(self) ‑> List[Tuple[str, int]]
-
Return sorted list with chromosome (name, size) pairs. This order is the same as the order of chroms in the concatenated signals.