Source code for allensdk.core.nwb_data_set

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2015-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import h5py
import numpy as np


[docs]class NwbDataSet(object):
    """ A very simple interface for exracting electrophysiology data
    from an NWB file.
    """
    SPIKE_TIMES = "spike_times"
    DEPRECATED_SPIKE_TIMES = "aibs_spike_times"

    def __init__(self, file_name, spike_time_key=None):
        """ Initialize the NwbDataSet instance with a file name

        Parameters
        ----------
        file_name: string
           NWB file name
        """
        self.file_name = file_name
        if spike_time_key is None:
            self.spike_time_key = NwbDataSet.SPIKE_TIMES
        else:
            self.spike_time_key = spike_time_key

[docs]    def get_sweep(self, sweep_number):
        """ Retrieve the stimulus, response, index_range, and sampling rate
        for a particular sweep.  This method hides the NWB file's distinction
        between a "Sweep" and an "Experiment".  An experiment is a subset of
        of a sweep that excludes the initial test pulse.  It also excludes
        any erroneous response data at the end of the sweep (usually for
        ramp sweeps, where recording was terminated mid-stimulus).

        Some sweeps do not have an experiment, so full data arrays are
        returned.  Sweeps that have an experiment return full data arrays
        (include the test pulse) with any erroneous data trimmed from the
        back of the sweep.

        Parameters
        ----------
        sweep_number: int

        Returns
        -------
        dict
            A dictionary with 'stimulus', 'response', 'index_range', and
            'sampling_rate' elements.  The index range is a 2-tuple where
            the first element indicates the end of the test pulse and the
            second index is the end of valid response data.
        """
        with h5py.File(self.file_name, 'r') as f:

            swp = f['epochs']['Sweep_%d' % sweep_number]

            # fetch data from file and convert to correct SI unit
            # this operation depends on file version. early versions of
            #   the file have incorrect conversion information embedded
            #   in the nwb file and data was stored in the appropriate
            #   SI unit. For those files, return uncorrected data.
            #   For newer files (1.1 and later), apply conversion value.
            major, minor = self.get_pipeline_version()
            if (major == 1 and minor > 0) or major > 1:
                # stimulus
                stimulus_dataset = swp['stimulus']['timeseries']['data']
                conversion = float(stimulus_dataset.attrs["conversion"])
                stimulus = stimulus_dataset.value * conversion
                # acquisition
                response_dataset = swp['response']['timeseries']['data']
                conversion = float(response_dataset.attrs["conversion"])
                response = response_dataset.value * conversion
            else:   # old file version
                stimulus_dataset = swp['stimulus']['timeseries']['data']
                stimulus = stimulus_dataset.value
                response = swp['response']['timeseries']['data'].value

            if 'unit' in stimulus_dataset.attrs:
                unit = stimulus_dataset.attrs["unit"].decode('UTF-8')

                unit_str = None
                if unit.startswith('A'):
                    unit_str = "Amps"
                elif unit.startswith('V'):
                    unit_str = "Volts"
                assert unit_str is not None, Exception(
                    "Stimulus time series unit not recognized")
            else:
                unit = None
                unit_str = 'Unknown'

            swp_idx_start = swp['stimulus']['idx_start'].value
            swp_length = swp['stimulus']['count'].value

            swp_idx_stop = swp_idx_start + swp_length - 1
            sweep_index_range = (swp_idx_start, swp_idx_stop)

            # if the sweep has an experiment, extract the experiment's index
            # range
            try:
                exp = f['epochs']['Experiment_%d' % sweep_number]
                exp_idx_start = exp['stimulus']['idx_start'].value
                exp_length = exp['stimulus']['count'].value
                exp_idx_stop = exp_idx_start + exp_length - 1
                experiment_index_range = (exp_idx_start, exp_idx_stop)
            except KeyError:
                # this sweep has no experiment.  return the index range of the
                # entire sweep.
                experiment_index_range = sweep_index_range

            assert sweep_index_range[0] == 0, Exception(
                "index range of the full sweep does not start at 0.")

            return {
                'stimulus': stimulus,
                'response': response,
                'stimulus_unit' : unit_str,
                'index_range': experiment_index_range,
                'sampling_rate': 1.0 * swp['stimulus']['timeseries']['starting_time'].attrs['rate']
            }

[docs]    def set_sweep(self, sweep_number, stimulus, response):
        """ Overwrite the stimulus or response of an NWB file.
        If the supplied arrays are shorter than stored arrays,
        they are padded with zeros to match the original data
        size.

        Parameters
        ----------
        sweep_number: int

        stimulus: np.array
           Overwrite the stimulus with this array.  If None, stimulus is unchanged.

        response: np.array
            Overwrite the response with this array.  If None, response is unchanged.
        """

        with h5py.File(self.file_name, 'r+') as f:
            swp = f['epochs']['Sweep_%d' % sweep_number]

            # this is the length of the entire sweep data, including test pulse and
            # whatever might be in front of it
            # TODO: remove deprecated 'idx_stop'
            if 'idx_stop' in swp['stimulus']:
                sweep_length = swp['stimulus']['idx_stop'].value + 1
            else:
                sweep_length = swp['stimulus']['count'].value

            if stimulus is not None:
                # if the data is shorter than the sweep, pad it with zeros
                missing_data = sweep_length - len(stimulus)
                if missing_data > 0:
                    stimulus = np.append(stimulus, np.zeros(missing_data))

                swp['stimulus']['timeseries']['data'][...] = stimulus

            if response is not None:
                # if the data is shorter than the sweep, pad it with zeros
                missing_data = sweep_length - len(response)
                if missing_data > 0:
                    response = np.append(response, np.zeros(missing_data))

                swp['response']['timeseries']['data'][...] = response

[docs]    def get_pipeline_version(self):
        """ Returns the AI pipeline version number, stored in the 
            metadata field 'generated_by'. If that field is
            missing, version 0.0 is returned.

            Returns
            -------
            int tuple: (major, minor)
        """
        try:
            with h5py.File(self.file_name, 'r') as f:
                if 'generated_by' in f["general"]:
                    info = f["general/generated_by"]
                    # generated_by stores array of keys and values
                    # keys are even numbered, corresponding values are in
                    #   odd indices
                    for i in range(len(info)):
                        val = info[i]
                        if info[i] == 'version':
                            version = info[i+1]
                            break
            toks = version.split('.')
            if len(toks) >= 2:
                major = int(toks[0])
                minor = int(toks[1])
        except:
            minor = 0
            major = 0
        return major, minor

[docs]    def get_spike_times(self, sweep_number, key=None):
        """ Return any spike times stored in the NWB file for a sweep.

        Parameters
        ----------
        sweep_number: int
            index to access
        key : string
            label where the spike times are stored (default NwbDataSet.SPIKE_TIMES)

        Returns
        -------
        list
           list of spike times in seconds relative to the start of the sweep
        """

        if key is None:
            key = self.spike_time_key

        with h5py.File(self.file_name, 'r') as f:
            sweep_name = "Sweep_%d" % sweep_number
            datasets = ["analysis/%s/Sweep_%d" % (key, sweep_number),
                        "analysis/%s/Sweep_%d" % (self.DEPRECATED_SPIKE_TIMES, sweep_number)]

            for ds in datasets:
                if ds in f:
                    return f[ds].value
            return []

[docs]    def set_spike_times(self, sweep_number, spike_times, key=None):
        """ Set or overwrite the spikes times for a sweep.

        Parameters
        ----------
        sweep_number : int
            index to access
        key : string
            where the times are stored (default NwbDataSet.SPIKE_TIME)

        spike_times: np.array
           array of spike times in seconds
        """

        if key is None:
            key = self.spike_time_key

        with h5py.File(self.file_name, 'r+') as f:
            # make sure expected directory structure is in place
            if "analysis" not in f.keys():
                f.create_group("analysis")

            analysis_dir = f["analysis"]
            if NwbDataSet.SPIKE_TIMES not in analysis_dir.keys():
                #   analysis_dir.create_group(NwbDataSet.SPIKE_TIMES)
                g = analysis_dir.create_group(NwbDataSet.SPIKE_TIMES)
                # mixup in specification for validator resulted everything
                #   in 'analysis' requiring a custom label, even though
                #   it's already known to be custom. don't argue, just
                #   support the metadata redundancy
                g.attrs["neurodata_type"] = "Custom"

            spike_dir = analysis_dir[NwbDataSet.SPIKE_TIMES]

            # see if desired dataset already exists
            sweep_name = "Sweep_%d" % sweep_number
            if sweep_name in spike_dir.keys():
                # rewriting data -- delete old dataset
                del spike_dir[sweep_name]

            spike_dir.create_dataset(
                sweep_name, data=spike_times, dtype='f8', maxshape=(None,))

[docs]    def get_sweep_numbers(self):
        """ Get all of the sweep numbers in the file, including test sweeps. """

        with h5py.File(self.file_name, 'r') as f:
            sweeps = [int(e.split('_')[1])
                      for e in f['epochs'].keys() if e.startswith('Sweep_')]
            return sweeps

[docs]    def get_experiment_sweep_numbers(self):
        """ Get all of the sweep numbers for experiment epochs in the file, not including test sweeps. """

        with h5py.File(self.file_name, 'r') as f:
            sweeps = [int(e.split('_')[1])
                      for e in f['epochs'].keys() if e.startswith('Experiment_')]
            return sweeps

[docs]    def fill_sweep_responses(self, fill_value=0.0, sweep_numbers=None, extend_experiment=False):
        """ Fill sweep response arrays with a single value.

        Parameters
        ----------
        fill_value: float
            Value used to fill sweep response array

        sweep_numbers: list
            List of integer sweep numbers to be filled (default all sweeps)

        extend_experiment: bool
            If True, extend experiment epoch length to the end of the sweep (undo any truncation)

        """

        with h5py.File(self.file_name, 'a') as f:
            if sweep_numbers is None:
                sweep_numbers = self.get_sweep_numbers()

            for sweep_number in sweep_numbers:
                epoch = "Sweep_%d" % sweep_number
                if epoch in f['epochs']:
                    f['epochs'][epoch]['response'][
                        'timeseries']['data'][...] = fill_value

                if extend_experiment:
                    epoch = "Experiment_%d" % sweep_number
                    if epoch in f['epochs']:
                        idx_start = f['epochs'][epoch]['stimulus']['idx_start'].value
                        count = f['epochs'][epoch]['stimulus']['timeseries']['data'].shape[0]

                        del f['epochs'][epoch]['stimulus']['count']
                        f['epochs'][epoch]['stimulus']['count'] = count - idx_start


[docs]    def get_sweep_metadata(self, sweep_number):
        """ Retrieve the sweep level metadata associated with each sweep.
        Includes information on stimulus parameters like its name and amplitude
        as well as recording quality metadata, like access resistance and
        seal quality.

        Parameters
        ----------
        sweep_number: int

        Returns
        -------
        dict
            A dictionary with 'aibs_stimulus_amplitude_pa', 'aibs_stimulus_name',
            'gain', 'initial_access_resistance', 'seal' elements.  These specific
            fields are ones encoded in the original AIBS in vitro .nwb files.
        """
        with h5py.File(self.file_name, 'r') as f:

            sweep_metadata = {}

            # the sweep level metadata is stored in
            # stimulus/presentation/Sweep_XX in the .nwb file

            # indicates which metadata fields to return
            metadata_fields = ['aibs_stimulus_amplitude_pa', 'aibs_stimulus_name',
                               'gain', 'initial_access_resistance', 'seal']
            try:
                stim_details = f['stimulus']['presentation'][
                    'Sweep_%d' % sweep_number]
                for field in metadata_fields:
                    # check if sweep contains the specific metadata field
                    if field in stim_details.keys():
                        sweep_metadata[field] = stim_details[field].value

            except KeyError:
                sweep_metadata = {}

            return sweep_metadata
Source code for allensdk.core.nwb_data_set

Contents

Questions