Source code for allensdk.core.cell_types_cache

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2015-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import os
from six import string_types

from allensdk.config.manifest_builder import ManifestBuilder
from allensdk.api.cache import Cache
from allensdk.api.queries.cell_types_api import CellTypesApi

from . import json_utilities as json_utilities
from .nwb_data_set import NwbDataSet
from . import  swc

import logging
import warnings
import pandas as pd


[docs]class CellTypesCache(Cache): """ Cache class for storing and accessing data from the Cell Types Database. By default, this class will cache any downloaded metadata or files in well known locations defined in a manifest file. This behavior can be disabled. Attributes ---------- api: CellTypesApi instance The object used for making API queries related to the Cell Types Database Parameters ---------- cache: boolean Whether the class should save results of API queries to locations specified in the manifest file. Queries for files (as opposed to metadata) must have a file location. If caching is disabled, those locations must be specified in the function call (e.g. get_ephys_data(file_name='file.nwb')). manifest_file: string File name of the manifest to be read. Default is "cell_types_manifest.json". """ # manifest keys CELLS_KEY = 'CELLS' EPHYS_FEATURES_KEY = 'EPHYS_FEATURES' MORPHOLOGY_FEATURES_KEY = 'MORPHOLOGY_FEATURES' EPHYS_DATA_KEY = 'EPHYS_DATA' EPHYS_SWEEPS_KEY = 'EPHYS_SWEEPS' RECONSTRUCTION_KEY = 'RECONSTRUCTION' MARKER_KEY = 'MARKER' MANIFEST_VERSION = "1.1" def __init__(self, cache=True, manifest_file='cell_types_manifest.json', base_uri=None): super(CellTypesCache, self).__init__( manifest=manifest_file, cache=cache, version=self.MANIFEST_VERSION) self.api = CellTypesApi(base_uri=base_uri)
[docs] def get_cells(self, file_name=None, require_morphology=False, require_reconstruction=False, reporter_status=None, species=None, simple=True): """ Download metadata for all cells in the database and optionally return a subset filtered by whether or not they have a morphology or reconstruction. Parameters ---------- file_name: string File name to save/read the cell metadata as JSON. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. require_morphology: boolean Filter out cells that have no morphological images. require_reconstruction: boolean Filter out cells that have no morphological reconstructions. reporter_status: list Filter for cells that have one or more cell reporter statuses. species: list Filter for cells that belong to one or more species. If None, return all. Must be one of [ CellTypesApi.MOUSE, CellTypesApi.HUMAN ]. """ file_name = self.get_cache_path(file_name, self.CELLS_KEY) cells = self.api.list_cells_api(path=file_name, strategy='lazy', **Cache.cache_json()) if isinstance(reporter_status, string_types): reporter_status = [reporter_status] # filter the cells on the way out cells = self.api.filter_cells_api(cells, require_morphology, require_reconstruction, reporter_status, species, simple) return cells
[docs] def get_ephys_sweeps(self, specimen_id, file_name=None): """ Download sweep metadata for a single cell specimen. Parameters ---------- specimen_id: int ID of a cell. """ file_name = self.get_cache_path( file_name, self.EPHYS_SWEEPS_KEY, specimen_id) sweeps = self.api.get_ephys_sweeps(specimen_id, strategy='lazy', path=file_name, **Cache.cache_json()) return sweeps
[docs] def get_ephys_features(self, dataframe=False, file_name=None): """ Download electrophysiology features for all cells in the database. Parameters ---------- file_name: string File name to save/read the ephys features metadata as CSV. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. dataframe: boolean Return the output as a Pandas DataFrame. If False, return a list of dictionaries. """ file_name = self.get_cache_path(file_name, self.EPHYS_FEATURES_KEY) if self.cache: if dataframe: warnings.warn("dataframe argument is deprecated.") args = Cache.cache_csv_dataframe() else: args = Cache.cache_csv_json() args['strategy'] = 'lazy' else: args = Cache.nocache_json() features_df = self.api.get_ephys_features(path=file_name, **args) return features_df
[docs] def get_morphology_features(self, dataframe=False, file_name=None): """ Download morphology features for all cells with reconstructions in the database. Parameters ---------- file_name: string File name to save/read the ephys features metadata as CSV. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. dataframe: boolean Return the output as a Pandas DataFrame. If False, return a list of dictionaries. """ file_name = self.get_cache_path( file_name, self.MORPHOLOGY_FEATURES_KEY) if self.cache: if dataframe: warnings.warn("dataframe argument is deprecated.") args = Cache.cache_csv_dataframe() else: args = Cache.cache_csv_json() else: args = Cache.nocache_json() args['strategy'] = 'lazy' args['path'] = file_name return self.api.get_morphology_features(**args)
[docs] def get_all_features(self, dataframe=False, require_reconstruction=True): """ Download morphology and electrophysiology features for all cells and merge them into a single table. Parameters ---------- dataframe: boolean Return the output as a Pandas DataFrame. If False, return a list of dictionaries. require_reconstruction: boolean Only return ephys and morphology features for cells that have reconstructions. Default True. """ ephys_features = pd.DataFrame(self.get_ephys_features()) morphology_features = pd.DataFrame(self.get_morphology_features()) how = 'inner' if require_reconstruction else 'outer' all_features = ephys_features.merge(morphology_features, how=how, on='specimen_id') if dataframe: warnings.warn("dataframe argument is deprecated.") return all_features else: return all_features.to_dict('records')
[docs] def get_ephys_data(self, specimen_id, file_name=None): """ Download electrophysiology traces for a single cell in the database. Parameters ---------- specimen_id: int The ID of a cell specimen to download. file_name: string File name to save/read the ephys features metadata as CSV. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. Returns ------- NwbDataSet A class instance with helper methods for retrieving stimulus and response traces out of an NWB file. """ file_name = self.get_cache_path( file_name, self.EPHYS_DATA_KEY, specimen_id) self.api.save_ephys_data(specimen_id, file_name, strategy='lazy') return NwbDataSet(file_name)
[docs] def get_reconstruction(self, specimen_id, file_name=None): """ Download and open a reconstruction for a single cell in the database. Parameters ---------- specimen_id: int The ID of a cell specimen to download. file_name: string File name to save/read the reconstruction SWC. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. Returns ------- Morphology A class instance with methods for accessing morphology compartments. """ file_name = self.get_cache_path( file_name, self.RECONSTRUCTION_KEY, specimen_id) if file_name is None: raise Exception( "Please enable caching (CellTypes.cache = True) or specify a save_file_name.") if not os.path.exists(file_name): self.api.save_reconstruction(specimen_id, file_name) return swc.read_swc(file_name)
[docs] def get_reconstruction_markers(self, specimen_id, file_name=None): """ Download and open a reconstruction marker file for a single cell in the database. Parameters ---------- specimen_id: int The ID of a cell specimen to download. file_name: string File name to save/read the reconstruction marker. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. Returns ------- Morphology A class instance with methods for accessing morphology compartments. """ file_name = self.get_cache_path( file_name, self.MARKER_KEY, specimen_id) if file_name is None: raise Exception( "Please enable caching (CellTypes.cache = True) or specify a save_file_name.") if not os.path.exists(file_name): try: self.api.save_reconstruction_markers(specimen_id, file_name) except LookupError as e: logging.warning(e.args) return [] return swc.read_marker_file(file_name)
[docs] def build_manifest(self, file_name): """ Construct a manifest for this Cache class and save it in a file. Parameters ---------- file_name: string File location to save the manifest. """ mb = ManifestBuilder() mb.set_version(self.MANIFEST_VERSION) mb.add_path('BASEDIR', '.') mb.add_path(self.CELLS_KEY, 'cells.json', typename='file', parent_key='BASEDIR') mb.add_path(self.EPHYS_DATA_KEY, 'specimen_%d/ephys.nwb', typename='file', parent_key='BASEDIR') mb.add_path(self.EPHYS_FEATURES_KEY, 'ephys_features.csv', typename='file', parent_key='BASEDIR') mb.add_path(self.MORPHOLOGY_FEATURES_KEY, 'morphology_features.csv', typename='file', parent_key='BASEDIR') mb.add_path(self.RECONSTRUCTION_KEY, 'specimen_%d/reconstruction.swc', typename='file', parent_key='BASEDIR') mb.add_path(self.MARKER_KEY, 'specimen_%d/reconstruction.marker', typename='file', parent_key='BASEDIR') mb.add_path(self.EPHYS_SWEEPS_KEY, 'specimen_%d/ephys_sweeps.json', typename='file', parent_key='BASEDIR') mb.write_json_file(file_name)
[docs]class ReporterStatus: """ Valid strings for filtering by cell reporter status. """ POSITIVE = 'positive' NEGATIVE = 'negative' NA = None INDETERMINATE = None