Source code for merlin.util.barcodedb

from abc import abstractmethod
from typing import List
import pandas
import numpy as np

from merlin.core import dataset


[docs]class BarcodeDB: """ An abstract class for storing and retrieving barcode information. For each barcode in the database, the following information is stored: barcode - the error corrected binary word corresponding to the barcode assigned_barcode - barcode_id - the index of the barcode in the codebook fov - the field of view where the barcode was identified mean_intensity - the mean intensity in the pixels corresponding to this barcode max_intensity - the max intensity in the pixels corresponding to this barcode area - the number of pixels covered by the barcode mean_distance - the distance between the barcode and the measured pixel traces averaged for all pixels corresponding to the barcode min_distance - the minimum distance between the barcode and the measured pixel traces of all pixels corresponding to the barcode x,y,z - the average x,y,z position of all pixels covered by the barcode weighted_x, weighted_y, weighted_z - the average x,y,z position of of all pixels covered by the barcode weighted by the magnitude of each pixel global_x, global_y, global_z - the global x,y,z position of the barcode cell_index - the cell that contains this barcode intensity_i - the mean intensity across corresponding pixels for bit i where i is an integer from 0 to the number of bits-1. """ def __init__(self, dataSet, analysisTask): self._dataSet = dataSet self._analysisTask = analysisTask try: self._codebook = self._analysisTask.get_codebook() except AttributeError: self._codebook = self._dataSet.get_codebook() def _get_bc_column_types(self): columnInformation = {'barcode_id': np.uint16, 'fov': np.uint16, 'mean_intensity': np.float32, 'max_intensity': np.float32, 'area': np.uint16, 'mean_distance': np.float32, 'min_distance': np.float32, 'x': np.float32, 'y': np.float32, 'z': np.float32, 'global_x': np.float32, 'global_y': np.float32, 'global_z': np.float32, 'cell_index': np.int32} for i in range(self._codebook.get_bit_count()): columnInformation['intensity_'+str(i)] = np.float32 return columnInformation
[docs] @abstractmethod def empty_database(self, fov: int=None) -> None: """Remove all barcodes from this database. Args: fov: index of the field of view. If specified, only barcodes corresponding to the specified fov will be removed. Otherwise, all barcodes will be removed. """ pass
[docs] @abstractmethod def get_barcodes(self, fov: int=None, columnList: List[str]=None, chunksize: int=None): """Get barcodes stored in this database. Args: fov: index of the field view. If None, barcodes for all fovs are returned. columnList: list of columns to extract. If not specified, all columns are returned. chunksize: the size of chunks to iterate. If not specified, a pandas dataframe is returned otherwise an iterator over the barcodes is returned. Returns: if chunksize is not set, a pandas dataframe containing all the requested barcodes is returned. Otherwise an iterator is returned that iterates over the requested barcodes. """ pass
[docs] @abstractmethod def get_filtered_barcodes( self, areaThreshold: int, intensityThreshold: float, distanceThreshold: float=None, fov: int=None, chunksize: int=None): """Get barcodes from this barcode database that pass the area and intensity thresholds. Args: areaThreshold: the minimum area threshold. Barcodes that have an area equal to the specified threshold are included in the output. intensityThreshold: the minimum value for mean_intenity for the select barcodes distanceThreshold: the maximum value for min_distance for the select barcodes fov: index of the field view. If None, barcodes for all fovs are returned. chunksize: the size of chunks to iterate. If not specified, a pandas dataframe is returned otherwise an iterator over the barcodes is returned. Returns: if chunksize is not set, a pandas dataframe containing all the requested barcodes is returned. Otherwise an iterator is returned that iterates over the requested barcodes. """ pass
[docs] @abstractmethod def get_intensities_for_barcodes_with_area( self, area: int) -> pandas.Series: """Gets the barcode intensities for barcodes that have the specified area. """ pass
[docs] @abstractmethod def write_barcodes(self, barcodeInformation: pandas.DataFrame, fov: int=None) -> None: """Writes the specified barcodes into the barcode database. If all the barcodes correspond to the same fov, then fov can be specified to improve performance. This function does not verify that fov is specified incorrectly. If fov is specified but the provided barcodes are not all from the same fov, the barcode database may become corrupted. Args: barcodeInformation: barcodes to write to the database. The dataframe must have the columns specified for a barcode database. fov: the fov of the barcodes if they all correspond to the same fov. If barcodeInformation contains barcodes from different fovs, then fov should be set to None. """ pass
[docs] def get_barcode_intensities(self) -> pandas.Series: """Get mean intensities for all barcodes in this database. Returns: series containing mean intensity for all barcodes """ return self.get_barcodes( columnList=['mean_intensity'])['mean_intensity']
[docs] def get_barcode_areas(self) -> pandas.Series: """Get areas for all barcodes in this database. Returns: series containing areas for all barcodes """ return self.get_barcodes(columnList=['area'])['area']
[docs] def get_barcode_distances(self) -> pandas.Series: """Get distances for all barcodes in this database Returns: series containing distances for all barcodes """ return self.get_barcodes(columnList=['mean_distance'])['mean_distance']
[docs]class PyTablesBarcodeDB(BarcodeDB): def __init__(self, dataSet: dataset.DataSet, analysisTask): super().__init__(dataSet, analysisTask)
[docs] def empty_database(self, fov: int=None) -> None: if fov is None: for f in self._dataSet.get_fovs(): self.empty_database(f) self._dataSet.delete_pandas_hdfstore( 'barcode_data', self._analysisTask, fov, 'barcodes')
[docs] def get_barcodes(self, fov=None, columnList=None, chunkSize=None)\ -> pandas.DataFrame: if fov is None: barcodes = pandas.concat( [self.get_barcodes(fov=x, columnList=columnList) for x in self._dataSet.get_fovs()], sort=False) else: try: with self._dataSet.open_pandas_hdfstore( 'r', 'barcode_data', self._analysisTask, fov, 'barcodes') as pandasHDF: if 'barcodes' not in pandasHDF: return pandas.DataFrame() if columnList is None: barcodes = pandasHDF['barcodes'] else: barcodes = pandas.read_hdf(pandasHDF, key='barcodes', columns=columnList) except OSError: barcodes = pandas.DataFrame() # if no barcodes are present make sure the dataframe still has the # correct columns if len(barcodes) == 0: if columnList: barcodes = pandas.DataFrame(columns=columnList) else: barcodes = pandas.DataFrame( columns=self._get_bc_column_types().keys()) return barcodes
[docs] def get_filtered_barcodes( self, areaThreshold: int, intensityThreshold: float, distanceThreshold: float=None, fov: int=None, chunksize: int=None): allBarcodes = self.get_barcodes(fov) if distanceThreshold is None: filteredBarcodes = allBarcodes[ (allBarcodes['area'] >= areaThreshold) & (allBarcodes['mean_intensity'] >= intensityThreshold)] else: filteredBarcodes = allBarcodes[ (allBarcodes['area'] >= areaThreshold) & (allBarcodes['mean_intensity'] >= intensityThreshold) & (allBarcodes['min_distance'] <= distanceThreshold)] return filteredBarcodes
[docs] def get_intensities_for_barcodes_with_area( self, area: int) -> pandas.Series: allBarcodes = self.get_barcodes(columnList=['area', 'mean_intensity']) return allBarcodes[allBarcodes['area'] == area]['mean_intensity']
[docs] def write_barcodes(self, barcodeInformation: pandas.DataFrame, fov: int=None) -> None: if len(barcodeInformation) <= 0: return if fov is None: for f in barcodeInformation.fov.unique(): self.write_barcodes( barcodeInformation.loc[barcodeInformation['fov'] == f], fov=f) with self._dataSet.open_pandas_hdfstore( 'a', 'barcode_data', self._analysisTask, fov, 'barcodes' ) as pandasHDF: tablesType = self._get_bc_column_types() pandasHDF.append('barcodes', barcodeInformation.astype(tablesType), format='table')