Source code for allensdk.api.queries.brain_observatory_api

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

import logging

import pandas as pd
from six import string_types

from allensdk.config.manifest import Manifest
import allensdk.brain_observatory.stimulus_info as stimulus_info

from .rma_template import RmaTemplate
from ..cache import cacheable, Cache
from .rma_pager import pageable


[docs]class BrainObservatoryApi(RmaTemplate): _log = logging.getLogger('allensdk.api.queries.brain_observatory_api') NWB_FILE_TYPE = 'NWBOphys' CELL_MAPPING_ID = 590985414 rma_templates = \ {"brain_observatory_queries": [ {'name': 'list_isi_experiments', 'description': 'see name', 'model': 'IsiExperiment', 'num_rows': 'all', 'count': False, 'criteria_params': [] }, {'name': 'isi_experiment_by_ids', 'description': 'see name', 'model': 'IsiExperiment', 'criteria': '[id$in{{ isi_experiment_ids }}]', 'include': 'experiment_container(ophys_experiments,targeted_structure)', 'num_rows': 'all', 'count': False, 'criteria_params': ['isi_experiment_ids'] }, {'name': 'ophys_experiment_by_ids', 'description': 'see name', 'model': 'OphysExperiment', 'criteria': '{% if ophys_experiment_ids is defined %}[id$in{{ ophys_experiment_ids }}]{%endif%}', 'include': 'experiment_container,well_known_files(well_known_file_type),targeted_structure,specimen(donor(age,transgenic_lines))', 'num_rows': 'all', 'count': False, 'criteria_params': ['ophys_experiment_ids'] }, {'name': 'ophys_experiment_data', 'description': 'see name', 'model': 'WellKnownFile', 'criteria': '[attachable_id$eq{{ ophys_experiment_id }}],well_known_file_type[name$eq%s]' % NWB_FILE_TYPE, 'num_rows': 'all', 'count': False, 'criteria_params': ['ophys_experiment_id'] }, {'name': 'column_definitions', 'description': 'see name', 'model': 'ApiColumnDefinition', 'criteria': '[api_class_name$eq{{ api_class_name }}]', 'num_rows': 'all', 'count': False, 'criteria_params': ['api_class_name'] }, {'name': 'column_definition_class_names', 'description': 'see name', 'model': 'ApiColumnDefinition', 'only': ['api_class_name'], 'num_rows': 'all', 'count': False, }, {'name': 'stimulus_mapping', 'description': 'see name', 'model': 'ApiCamStimulusMapping', 'criteria': '{% if stimulus_mapping_ids is defined %}[id$in{{ stimulus_mapping_ids }}]{%endif%}', 'num_rows': 'all', 'count': False, 'criteria_params': ['stimulus_mapping_ids'] }, {'name': 'experiment_container', 'description': 'see name', 'model': 'ExperimentContainer', 'criteria': '{% if experiment_container_ids is defined %}[id$in{{ experiment_container_ids }}]{%endif%}', 'include': 'ophys_experiments,isi_experiment,specimen(donor(conditions,age,transgenic_lines)),targeted_structure', 'num_rows': 'all', 'count': False, 'criteria_params': ['experiment_container_ids'] }, {'name': 'experiment_container_metric', 'description': 'see name', 'model': 'ApiCamExperimentContainerMetric', 'criteria': '{% if experiment_container_metric_ids is defined %}[id$in{{ experiment_container_metric_ids }}]{%endif%}', 'num_rows': 'all', 'count': False, 'criteria_params': ['experiment_container_metric_ids'] }, {'name': 'cell_metric', 'description': 'see name', 'model': 'ApiCamCellMetric', 'criteria': '{% if cell_specimen_ids is defined %}[cell_specimen_id$in{{ cell_specimen_ids }}]{%endif%}', 'criteria_params': ['cell_specimen_ids'] }, {'name': 'cell_specimen_id_mapping_table', 'description': 'see name', 'model': 'WellKnownFile', 'criteria': '[id$eq{{ mapping_table_id }}],well_known_file_type[name$eqOphysCellSpecimenIdMapping]', 'num_rows': 'all', 'count': False, 'criteria_params': ['mapping_table_id']} ]} _QUERY_TEMPLATES = { "=": '({0} == {1})', "<": '({0} < {1})', ">": '({0} > {1})', "<=": '({0} <= {1})', ">=": '({0} >= {1})', "between": '({0} >= {1}) and ({0} <= {2})', "in": '({0} == {1})', "is": '({0} == {1})' } def __init__(self, base_uri=None, datacube_uri=None): super(BrainObservatoryApi, self).__init__(base_uri, query_manifest=BrainObservatoryApi.rma_templates) self.datacube_uri = datacube_uri
[docs] @cacheable() def get_ophys_experiments(self, ophys_experiment_ids=None): ''' Get OPhys Experiments by id Parameters ---------- ophys_experiment_ids : integer or list of integers, optional only select specific experiments. Returns ------- dict : ophys experiment metadata ''' data = self.template_query('brain_observatory_queries', 'ophys_experiment_by_ids', ophys_experiment_ids=ophys_experiment_ids) return data
[docs] def get_isi_experiments(self, isi_experiment_ids=None): ''' Get ISI Experiments by id Parameters ---------- isi_experiment_ids : integer or list of integers, optional only select specific experiments. Returns ------- dict : isi experiment metadata ''' data = self.template_query('brain_observatory_queries', 'isi_experiment_by_ids', isi_experiment_ids=isi_experiment_ids) return data
[docs] def list_isi_experiments(self, isi_ids=None): '''List ISI experiments available through the Allen Institute API Parameters ---------- neuronal_model_ids : integer or list of integers, optional only select specific isi experiments. Returns ------- dict : neuronal model metadata ''' data = self.template_query('brain_observatory_queries', 'list_isi_experiments') return data
[docs] def list_column_definition_class_names(self): ''' Get column definitions Parameters ---------- Returns ------- list : api class name strings ''' data = self.template_query('brain_observatory_queries', 'column_definition_class_names') names = list(set([n['api_class_name'] for n in data])) return names
[docs] def get_column_definitions(self, api_class_name=None): ''' Get column definitions Parameters ---------- api_class_names : string or list of strings, optional only select specific column definition records. Returns ------- dict : column definition metadata ''' data = self.template_query('brain_observatory_queries', 'column_definitions', api_class_name=api_class_name) return data
[docs] @cacheable() def get_stimulus_mappings(self, stimulus_mapping_ids=None): ''' Get stimulus mappings by id Parameters ---------- stimulus_mapping_ids : integer or list of integers, optional only select specific stimulus mapping records. Returns ------- dict : stimulus mapping metadata ''' data = self.template_query('brain_observatory_queries', 'stimulus_mapping', stimulus_mapping_ids=stimulus_mapping_ids) return data
[docs] @cacheable() @pageable(num_rows=2000, total_rows='all') def get_cell_metrics(self, cell_specimen_ids=None, *args, **kwargs): ''' Get cell metrics by id Parameters ---------- cell_metrics_ids : integer or list of integers, optional only select specific cell metric records. Returns ------- dict : cell metric metadata ''' data = self.template_query('brain_observatory_queries', 'cell_metric', cell_specimen_ids=cell_specimen_ids, *args, **kwargs) return data
[docs] @cacheable() def get_experiment_containers(self, experiment_container_ids=None): ''' Get experiment container by id Parameters ---------- experiment_container_ids : integer or list of integers, optional only select specific experiment containers. Returns ------- dict : experiment container metadata ''' data = self.template_query('brain_observatory_queries', 'experiment_container', experiment_container_ids=experiment_container_ids) return data
[docs] def get_experiment_container_metrics(self, experiment_container_metric_ids=None): ''' Get experiment container metrics by id Parameters ---------- isi_experiment_ids : integer or list of integers, optional only select specific experiments. Returns ------- dict : isi experiment metadata ''' data = self.template_query('brain_observatory_queries', 'experiment_container_metric', experiment_container_metric_ids=experiment_container_metric_ids) return data
[docs] @cacheable(strategy='create', pathfinder=Cache.pathfinder(file_name_position=2, path_keyword='file_name')) def save_ophys_experiment_data(self, ophys_experiment_id, file_name): data = self.template_query('brain_observatory_queries', 'ophys_experiment_data', ophys_experiment_id=ophys_experiment_id) try: file_url = data[0]['download_link'] except Exception as _: raise Exception("ophys experiment %d has no data file" % ophys_experiment_id) self._log.warning( "Downloading ophys_experiment %d NWB. This can take some time." % ophys_experiment_id) self.retrieve_file_over_http(self.api_url + file_url, file_name)
[docs] def filter_experiment_containers(self, containers, ids=None, targeted_structures=None, imaging_depths=None, transgenic_lines=None, include_failed=False): if not include_failed: containers = [c for c in containers if not c.get('failed', False)] if ids is not None: containers = [c for c in containers if c['id'] in ids] if targeted_structures is not None: containers = [c for c in containers if c[ 'targeted_structure']['acronym'] in targeted_structures] if imaging_depths is not None: containers = [c for c in containers if c[ 'imaging_depth'] in imaging_depths] if transgenic_lines is not None: tls = [ tl.lower() for tl in transgenic_lines ] containers = [c for c in containers for tl in c['specimen'][ 'donor']['transgenic_lines'] if tl['name'].lower() in tls] return containers
[docs] def filter_ophys_experiments(self, experiments, ids=None, experiment_container_ids=None, targeted_structures=None, imaging_depths=None, transgenic_lines=None, stimuli=None, session_types=None, include_failed=False, require_eye_tracking=False): # re-using the code from above experiments = self.filter_experiment_containers(experiments, ids=ids, targeted_structures=targeted_structures, imaging_depths=imaging_depths, transgenic_lines=transgenic_lines) if require_eye_tracking: experiments = [e for e in experiments if e.get('fail_eye_tracking', None) is False] if not include_failed: experiments = [e for e in experiments if not e.get('experiment_container',{}).get('failed', False)] if experiment_container_ids is not None: experiments = [e for e in experiments if e[ 'experiment_container_id'] in experiment_container_ids] if session_types is not None: experiments = [e for e in experiments if e[ 'stimulus_name'] in session_types] if stimuli is not None: experiments = [e for e in experiments if len(set(stimuli) & set(stimulus_info.stimuli_in_session(e['stimulus_name']))) > 0] return experiments
[docs] def filter_cell_specimens(self, cell_specimens, ids=None, experiment_container_ids=None, include_failed=False, filters=None): """ Filter a list of cell specimen records returned from the get_cell_metrics method according some of their properties. Parameters ---------- cell_specimens: list of dicts List of records returned by the get_cell_metrics method. ids: list of integers Return only records for cells with cell specimen ids in this list experiment_container_ids: list of integers Return only records for cells that belong to experiment container ids in this list include_failed: bool Whether to include cells from failed experiment containers filters: list of dicts Custom query used to reproduce filter sets created in the Allen Brain Observatory web application. The general form is a list of dictionaries each of which describes a filtering operation based on a metric. For more information, see dataframe_query. """ if not include_failed: cell_specimens = [c for c in cell_specimens if not c.get( 'failed_experiment_container', False)] if ids is not None: cell_specimens = [c for c in cell_specimens if c[ 'cell_specimen_id'] in ids] if experiment_container_ids is not None: cell_specimens = [c for c in cell_specimens if c[ 'experiment_container_id'] in experiment_container_ids] if filters is not None: cell_specimens = self.dataframe_query(cell_specimens, filters, 'cell_specimen_id') return cell_specimens
[docs] def dataframe_query_string(self, filters): """ Convert a list of cell metric filter dictionaries into a Pandas query string. """ def _quote_string(v): if isinstance(v, string_types): return "'%s'" % (v) else: return str(v) def _filter_clause(op, field, value): if op == 'in': query_args = [field, str(value)] elif type(value) is list: query_args = [field] + list(map(_quote_string, value)) else: query_args = [field, str(value)] cluster_string = self._QUERY_TEMPLATES[op].\ format(*query_args) return cluster_string query_string = ' & '.join(_filter_clause(f['op'], f['field'], f['value']) for f in filters) return query_string
[docs] def dataframe_query(self, data, filters, primary_key): """ Given a list of dictionary records and a list of filter dictionaries, filter the records using Pandas and return the filtered set of records. Parameters ---------- data: list of dicts List of dictionaries filters: list of dicts Each dictionary describes a filtering operation on a field in the dictionary. The general form is { 'field': <field>, 'op': <operation>, 'value': <filter_value(s)> }. For example, you can apply a threshold on the "osi_dg" column with something like this: { 'field': 'osi_dg', 'op': '>', 'value': 1.0 }. See _QUERY_TEMPLATES for a full list of operators. """ queries = self.dataframe_query_string(filters) result_dataframe = pd.DataFrame(data) result_dataframe = result_dataframe.query(queries) result_keys = set(result_dataframe[primary_key]) result = [d for d in data if d[primary_key] in result_keys] return result
[docs] def get_cell_specimen_id_mapping(self, file_name, mapping_table_id=None): '''Download mapping table from old to new cell specimen IDs. The mapping table is a CSV file that maps cell specimen ids that have changed between processing runs of the Brain Observatory pipeline. Parameters ---------- file_name : string Filename to save locally. mapping_table_id : integer ID of the mapping table file. Defaults to the most recent mapping table. Returns ------- pandas.DataFrame Mapping table as a DataFrame. ''' if mapping_table_id is None: mapping_table_id = self.CELL_MAPPING_ID data = self.template_query('brain_observatory_queries', 'cell_specimen_id_mapping_table', mapping_table_id=mapping_table_id) try: file_url = data[0]['download_link'] except Exception as _: raise Exception("No OphysCellSpecimenIdMapping file found.") self.retrieve_file_over_http(self.api_url + file_url, file_name) return pd.read_csv(file_name)