Source code for allensdk.core.nwb_data_set

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2015-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import h5py
import numpy as np


[docs]class NwbDataSet(object): """ A very simple interface for exracting electrophysiology data from an NWB file. """ SPIKE_TIMES = "spike_times" DEPRECATED_SPIKE_TIMES = "aibs_spike_times" def __init__(self, file_name, spike_time_key=None): """ Initialize the NwbDataSet instance with a file name Parameters ---------- file_name: string NWB file name """ self.file_name = file_name if spike_time_key is None: self.spike_time_key = NwbDataSet.SPIKE_TIMES else: self.spike_time_key = spike_time_key
[docs] def get_sweep(self, sweep_number): """ Retrieve the stimulus, response, index_range, and sampling rate for a particular sweep. This method hides the NWB file's distinction between a "Sweep" and an "Experiment". An experiment is a subset of of a sweep that excludes the initial test pulse. It also excludes any erroneous response data at the end of the sweep (usually for ramp sweeps, where recording was terminated mid-stimulus). Some sweeps do not have an experiment, so full data arrays are returned. Sweeps that have an experiment return full data arrays (include the test pulse) with any erroneous data trimmed from the back of the sweep. Parameters ---------- sweep_number: int Returns ------- dict A dictionary with 'stimulus', 'response', 'index_range', and 'sampling_rate' elements. The index range is a 2-tuple where the first element indicates the end of the test pulse and the second index is the end of valid response data. """ with h5py.File(self.file_name, 'r') as f: swp = f['epochs']['Sweep_%d' % sweep_number] # fetch data from file and convert to correct SI unit # this operation depends on file version. early versions of # the file have incorrect conversion information embedded # in the nwb file and data was stored in the appropriate # SI unit. For those files, return uncorrected data. # For newer files (1.1 and later), apply conversion value. major, minor = self.get_pipeline_version() if (major == 1 and minor > 0) or major > 1: # stimulus stimulus_dataset = swp['stimulus']['timeseries']['data'] conversion = float(stimulus_dataset.attrs["conversion"]) stimulus = stimulus_dataset.value * conversion # acquisition response_dataset = swp['response']['timeseries']['data'] conversion = float(response_dataset.attrs["conversion"]) response = response_dataset.value * conversion else: # old file version stimulus_dataset = swp['stimulus']['timeseries']['data'] stimulus = stimulus_dataset.value response = swp['response']['timeseries']['data'].value if 'unit' in stimulus_dataset.attrs: unit = stimulus_dataset.attrs["unit"].decode('UTF-8') unit_str = None if unit.startswith('A'): unit_str = "Amps" elif unit.startswith('V'): unit_str = "Volts" assert unit_str is not None, Exception( "Stimulus time series unit not recognized") else: unit = None unit_str = 'Unknown' swp_idx_start = swp['stimulus']['idx_start'].value swp_length = swp['stimulus']['count'].value swp_idx_stop = swp_idx_start + swp_length - 1 sweep_index_range = (swp_idx_start, swp_idx_stop) # if the sweep has an experiment, extract the experiment's index # range try: exp = f['epochs']['Experiment_%d' % sweep_number] exp_idx_start = exp['stimulus']['idx_start'].value exp_length = exp['stimulus']['count'].value exp_idx_stop = exp_idx_start + exp_length - 1 experiment_index_range = (exp_idx_start, exp_idx_stop) except KeyError: # this sweep has no experiment. return the index range of the # entire sweep. experiment_index_range = sweep_index_range assert sweep_index_range[0] == 0, Exception( "index range of the full sweep does not start at 0.") return { 'stimulus': stimulus, 'response': response, 'stimulus_unit' : unit_str, 'index_range': experiment_index_range, 'sampling_rate': 1.0 * swp['stimulus']['timeseries']['starting_time'].attrs['rate'] }
[docs] def set_sweep(self, sweep_number, stimulus, response): """ Overwrite the stimulus or response of an NWB file. If the supplied arrays are shorter than stored arrays, they are padded with zeros to match the original data size. Parameters ---------- sweep_number: int stimulus: np.array Overwrite the stimulus with this array. If None, stimulus is unchanged. response: np.array Overwrite the response with this array. If None, response is unchanged. """ with h5py.File(self.file_name, 'r+') as f: swp = f['epochs']['Sweep_%d' % sweep_number] # this is the length of the entire sweep data, including test pulse and # whatever might be in front of it # TODO: remove deprecated 'idx_stop' if 'idx_stop' in swp['stimulus']: sweep_length = swp['stimulus']['idx_stop'].value + 1 else: sweep_length = swp['stimulus']['count'].value if stimulus is not None: # if the data is shorter than the sweep, pad it with zeros missing_data = sweep_length - len(stimulus) if missing_data > 0: stimulus = np.append(stimulus, np.zeros(missing_data)) swp['stimulus']['timeseries']['data'][...] = stimulus if response is not None: # if the data is shorter than the sweep, pad it with zeros missing_data = sweep_length - len(response) if missing_data > 0: response = np.append(response, np.zeros(missing_data)) swp['response']['timeseries']['data'][...] = response
[docs] def get_pipeline_version(self): """ Returns the AI pipeline version number, stored in the metadata field 'generated_by'. If that field is missing, version 0.0 is returned. Returns ------- int tuple: (major, minor) """ try: with h5py.File(self.file_name, 'r') as f: if 'generated_by' in f["general"]: info = f["general/generated_by"] # generated_by stores array of keys and values # keys are even numbered, corresponding values are in # odd indices for i in range(len(info)): val = info[i] if info[i] == 'version': version = info[i+1] break toks = version.split('.') if len(toks) >= 2: major = int(toks[0]) minor = int(toks[1]) except: minor = 0 major = 0 return major, minor
[docs] def get_spike_times(self, sweep_number, key=None): """ Return any spike times stored in the NWB file for a sweep. Parameters ---------- sweep_number: int index to access key : string label where the spike times are stored (default NwbDataSet.SPIKE_TIMES) Returns ------- list list of spike times in seconds relative to the start of the sweep """ if key is None: key = self.spike_time_key with h5py.File(self.file_name, 'r') as f: sweep_name = "Sweep_%d" % sweep_number datasets = ["analysis/%s/Sweep_%d" % (key, sweep_number), "analysis/%s/Sweep_%d" % (self.DEPRECATED_SPIKE_TIMES, sweep_number)] for ds in datasets: if ds in f: return f[ds].value return []
[docs] def set_spike_times(self, sweep_number, spike_times, key=None): """ Set or overwrite the spikes times for a sweep. Parameters ---------- sweep_number : int index to access key : string where the times are stored (default NwbDataSet.SPIKE_TIME) spike_times: np.array array of spike times in seconds """ if key is None: key = self.spike_time_key with h5py.File(self.file_name, 'r+') as f: # make sure expected directory structure is in place if "analysis" not in f.keys(): f.create_group("analysis") analysis_dir = f["analysis"] if NwbDataSet.SPIKE_TIMES not in analysis_dir.keys(): # analysis_dir.create_group(NwbDataSet.SPIKE_TIMES) g = analysis_dir.create_group(NwbDataSet.SPIKE_TIMES) # mixup in specification for validator resulted everything # in 'analysis' requiring a custom label, even though # it's already known to be custom. don't argue, just # support the metadata redundancy g.attrs["neurodata_type"] = "Custom" spike_dir = analysis_dir[NwbDataSet.SPIKE_TIMES] # see if desired dataset already exists sweep_name = "Sweep_%d" % sweep_number if sweep_name in spike_dir.keys(): # rewriting data -- delete old dataset del spike_dir[sweep_name] spike_dir.create_dataset( sweep_name, data=spike_times, dtype='f8', maxshape=(None,))
[docs] def get_sweep_numbers(self): """ Get all of the sweep numbers in the file, including test sweeps. """ with h5py.File(self.file_name, 'r') as f: sweeps = [int(e.split('_')[1]) for e in f['epochs'].keys() if e.startswith('Sweep_')] return sweeps
[docs] def get_experiment_sweep_numbers(self): """ Get all of the sweep numbers for experiment epochs in the file, not including test sweeps. """ with h5py.File(self.file_name, 'r') as f: sweeps = [int(e.split('_')[1]) for e in f['epochs'].keys() if e.startswith('Experiment_')] return sweeps
[docs] def fill_sweep_responses(self, fill_value=0.0, sweep_numbers=None, extend_experiment=False): """ Fill sweep response arrays with a single value. Parameters ---------- fill_value: float Value used to fill sweep response array sweep_numbers: list List of integer sweep numbers to be filled (default all sweeps) extend_experiment: bool If True, extend experiment epoch length to the end of the sweep (undo any truncation) """ with h5py.File(self.file_name, 'a') as f: if sweep_numbers is None: sweep_numbers = self.get_sweep_numbers() for sweep_number in sweep_numbers: epoch = "Sweep_%d" % sweep_number if epoch in f['epochs']: f['epochs'][epoch]['response'][ 'timeseries']['data'][...] = fill_value if extend_experiment: epoch = "Experiment_%d" % sweep_number if epoch in f['epochs']: idx_start = f['epochs'][epoch]['stimulus']['idx_start'].value count = f['epochs'][epoch]['stimulus']['timeseries']['data'].shape[0] del f['epochs'][epoch]['stimulus']['count'] f['epochs'][epoch]['stimulus']['count'] = count - idx_start
[docs] def get_sweep_metadata(self, sweep_number): """ Retrieve the sweep level metadata associated with each sweep. Includes information on stimulus parameters like its name and amplitude as well as recording quality metadata, like access resistance and seal quality. Parameters ---------- sweep_number: int Returns ------- dict A dictionary with 'aibs_stimulus_amplitude_pa', 'aibs_stimulus_name', 'gain', 'initial_access_resistance', 'seal' elements. These specific fields are ones encoded in the original AIBS in vitro .nwb files. """ with h5py.File(self.file_name, 'r') as f: sweep_metadata = {} # the sweep level metadata is stored in # stimulus/presentation/Sweep_XX in the .nwb file # indicates which metadata fields to return metadata_fields = ['aibs_stimulus_amplitude_pa', 'aibs_stimulus_name', 'gain', 'initial_access_resistance', 'seal'] try: stim_details = f['stimulus']['presentation'][ 'Sweep_%d' % sweep_number] for field in metadata_fields: # check if sweep contains the specific metadata field if field in stim_details.keys(): sweep_metadata[field] = stim_details[field].value except KeyError: sweep_metadata = {} return sweep_metadata