Source code for allensdk.brain_observatory.natural_scenes

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2016-2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import scipy.stats as st
import numpy as np
import pandas as pd
from .stimulus_analysis import StimulusAnalysis
import logging
import h5py
from . import observatory_plots as oplots
from . import circle_plots as cplots
from .brain_observatory_exceptions import MissingStimulusException

[docs]class NaturalScenes(StimulusAnalysis): """ Perform tuning analysis specific to natural scenes stimulus. Parameters ---------- data_set: BrainObservatoryNwbDataSet object """ _log = logging.getLogger('allensdk.brain_observatory.natural_scenes') def __init__(self, data_set, **kwargs): super(NaturalScenes, self).__init__(data_set, **kwargs) self._number_scenes = StimulusAnalysis._PRELOAD self._sweeplength = StimulusAnalysis._PRELOAD self._interlength = StimulusAnalysis._PRELOAD self._extralength = StimulusAnalysis._PRELOAD @property def number_scenes(self): if self._number_scenes is StimulusAnalysis._PRELOAD: self.populate_stimulus_table() return self._number_scenes @property def sweeplength(self): if self._sweeplength is StimulusAnalysis._PRELOAD: self.populate_stimulus_table() return self._sweeplength @property def interlength(self): if self._interlength is StimulusAnalysis._PRELOAD: self.populate_stimulus_table() return self._interlength @property def extralength(self): if self._extralength is StimulusAnalysis._PRELOAD: self.populate_stimulus_table() return self._extralength
[docs] def populate_stimulus_table(self): self._stim_table = self.data_set.get_stimulus_table('natural_scenes') self._number_scenes = len(np.unique(self._stim_table.frame)) self._sweeplength = self._stim_table.end.iloc[ 1] - self._stim_table.start.iloc[1] self._interlength = 4 * self._sweeplength self._extralength = self._sweeplength
[docs] def get_response(self): ''' Computes the mean response for each cell to each stimulus condition. Return is a (# scenes, # cells, 3) np.ndarray. The final dimension contains the mean response to the condition (index 0), standard error of the mean of the response to the condition (index 1), and the number of trials with a significant (p < 0.05) response to that condition (index 2). Returns ------- Numpy array storing mean responses. ''' NaturalScenes._log.info("Calculating mean responses") response = np.empty((self.number_scenes, self.numbercells + 1, 3)) def ptest(x): return len(np.where(x < (0.05 / (self.number_scenes - 1)))[0]) for ns in range(self.number_scenes): subset_response = self.mean_sweep_response[ self.stim_table.frame == (ns - 1)] subset_pval = self.pval[self.stim_table.frame == (ns - 1)] response[ns, :, 0] = subset_response.mean(axis=0) response[ns, :, 1] = subset_response.std( axis=0) / np.sqrt(len(subset_response)) response[ns, :, 2] = subset_pval.apply(ptest, axis=0) return response
[docs] def get_peak(self): ''' Computes metrics about peak response condition for each cell. Returns ------- Pandas data frame with the following fields ('_ns' suffix is for natural scene): * scene_ns (scene number) * reliability_ns * peak_dff_ns (peak dF/F) * ptest_ns * p_run_ns * run_modulation_ns * time_to_peak_ns ''' NaturalScenes._log.info('Calculating peak response properties') peak = pd.DataFrame(index=range(self.numbercells), columns=('scene_ns', 'reliability_ns', 'peak_dff_ns', 'ptest_ns', 'p_run_ns', 'run_modulation_ns', 'time_to_peak_ns', 'cell_specimen_id','image_selectivity_ns')) cids = self.data_set.get_cell_specimen_ids() for nc in range(self.numbercells): nsp = np.argmax(self.response[1:, nc, 0]) peak.cell_specimen_id.iloc[nc] = cids[nc] peak.scene_ns[nc] = nsp # peak.response_reliability_ns[nc] = self.response[ # nsp + 1, nc, 2] / 0.50 # assume 50 trials peak.peak_dff_ns[nc] = self.response[nsp + 1, nc, 0] # subset = self.mean_sweep_response[self.stim_table.frame == nsp] # subset_stat = subset[subset.dx < 2] # subset_run = subset[subset.dx >= 2] # if (len(subset_run) > 5) & (len(subset_stat) > 5): # (_, peak.p_run_ns[nc]) = st.ks_2samp( # subset_run[str(nc)], subset_stat[str(nc)]) # peak.run_modulation_ns[nc] = subset_run[ # str(nc)].mean() / subset_stat[str(nc)].mean() # else: # peak.p_run_ns[nc] = np.NaN # peak.run_modulation_ns[nc] = np.NaN groups = [] for im in range(self.number_scenes): subset = self.mean_sweep_response[ self.stim_table.frame == (im - 1)] groups.append(subset[str(nc)].values) (_, peak.ptest_ns[nc]) = st.f_oneway(*groups) test = self.sweep_response[ self.stim_table.frame == nsp][str(nc)].mean() peak.time_to_peak_ns[nc] = ( np.argmax(test) - self.interlength) / self.acquisition_rate #running modulation subset = self.mean_sweep_response[self.stim_table.frame==nsp] subset_run = subset[subset.dx>=1] subset_stat = subset[subset.dx<1] if (len(subset_run)>4) & (len(subset_stat)>4): (_,peak.p_run_ns.iloc[nc]) = st.ttest_ind(subset_run[str(nc)], subset_stat[str(nc)], equal_var=False) if subset_run[str(nc)].mean()>subset_stat[str(nc)].mean(): peak.run_modulation_ns.iloc[nc] = (subset_run[str(nc)].mean() - subset_stat[str(nc)].mean())/np.abs(subset_run[str(nc)].mean()) elif subset_run[str(nc)].mean()<subset_stat[str(nc)].mean(): peak.run_modulation_ns.iloc[nc] = -1*((subset_stat[str(nc)].mean() - subset_run[str(nc)].mean())/np.abs(subset_stat[str(nc)].mean())) else: peak.p_run_ns.iloc[nc] = np.NaN peak.run_modulation_ns.iloc[nc] = np.NaN #reliability subset = self.sweep_response[self.stim_table.frame==nsp] corr_matrix = np.empty((len(subset),len(subset))) for i in range(len(subset)): for j in range(len(subset)): r,p = st.pearsonr(subset[str(nc)].iloc[i][28:42], subset[str(nc)].iloc[j][28:42]) corr_matrix[i,j] = r mask = np.ones((len(subset), len(subset))) for i in range(len(subset)): for j in range(len(subset)): if i>=j: mask[i,j] = np.NaN corr_matrix *= mask peak.reliability_ns.iloc[nc] = np.nanmean(corr_matrix) #image selectivity fmin = self.response[1:,nc,0].min() fmax = self.response[1:,nc,0].max() rtj = np.empty((1000,1)) for j in range(1000): thresh = fmin + j*((fmax-fmin)/1000.) theta = np.empty((118,1)) for im in range(118): if self.response[im+1,nc,0] > thresh: #im+1 to only look at images, not blanksweep theta[im] = 1 else: theta[im] = 0 rtj[j] = theta.mean() biga = rtj.mean() bigs = 1 - (2*biga) peak.image_selectivity_ns.iloc[nc] = bigs return peak
[docs] def plot_time_to_peak(self, p_value_max=oplots.P_VALUE_MAX, color_map=oplots.STIMULUS_COLOR_MAP): stimulus_table = self.data_set.get_stimulus_table('natural_scenes') resps = [] for index, row in self.peak.iterrows(): mean_response = self.sweep_response.ix[stimulus_table.frame==row.scene_ns][str(index)].mean() resps.append((mean_response - mean_response.mean() / mean_response.std())) mean_responses = np.array(resps) sorted_table = self.peak[self.peak.ptest_ns < p_value_max].sort_values('time_to_peak_ns') cell_order = sorted_table.index # time to peak is relative to stimulus start in seconds ttps = sorted_table.time_to_peak_ns.values + self.interlength / self.acquisition_rate msrs_sorted = mean_responses[cell_order,:] oplots.plot_time_to_peak(msrs_sorted, ttps, 0, (2*self.interlength + self.sweeplength) / self.acquisition_rate, (self.interlength) / self.acquisition_rate, (self.interlength + self.sweeplength) / self.acquisition_rate, color_map)
[docs] def open_corona_plot(self, cell_specimen_id=None, cell_index=None): cell_index = self.row_from_cell_id(cell_specimen_id, cell_index) df = self.mean_sweep_response[str(cell_index)] data = df.values st = self.data_set.get_stimulus_table('natural_scenes') mask = st[st.frame >= 0].index cmin = self.response[0,cell_index,0] cmax = data.mean() + data.std()*3 cp = cplots.CoronaPlotter() cp.plot(st.frame.ix[mask].values, data=df.ix[mask].values, clim=[cmin, cmax]) cp.show_arrow()
[docs] def reshape_response_array(self): ''' :return: response array in cells x stim x repetition for noise correlations ''' mean_sweep_response = self.mean_sweep_response.values[:, :self.numbercells] stim_table = self.stim_table frames = np.unique(stim_table.frame.values) reps = [len(np.where(stim_table.frame.values == frame)[0]) for frame in frames] Nreps = min(reps) # just in case there are different numbers of repetitions response_new = np.zeros((self.numbercells, self.number_scenes), dtype='object') for i, frame in enumerate(frames): ind = np.where(stim_table.frame.values == frame)[0][:Nreps] for c in range(self.numbercells): response_new[c, i] = mean_sweep_response[ind, c] return response_new
[docs] def get_signal_correlation(self, corr='spearman'): logging.debug("Calculating signal correlations") response = self.response[:, :, 0].T response = response[:self.numbercells, :] N, Nstim = response.shape signal_corr = np.zeros((N, N)) signal_p = np.empty((N, N)) if corr == 'pearson': for i in range(N): for j in range(i, N): # matrix is symmetric signal_corr[i, j], signal_p[i, j] = st.pearsonr(response[i], response[j]) elif corr == 'spearman': for i in range(N): for j in range(i, N): # matrix is symmetric signal_corr[i, j], signal_p[i, j] = st.spearmanr(response[i], response[j]) else: raise Exception('correlation should be pearson or spearman') signal_corr = np.triu(signal_corr) + np.triu(signal_corr, 1).T # fill in lower triangle signal_p = np.triu(signal_p) + np.triu(signal_p, 1).T # fill in lower triangle return signal_corr, signal_p
[docs] def get_representational_similarity(self, corr='spearman'): logging.debug("Calculating representational similarity") response = self.response[:, :, 0] response = response[:, :self.numbercells] Nstim, N = response.shape rep_sim = np.zeros((Nstim, Nstim)) rep_sim_p = np.empty((Nstim, Nstim)) if corr == 'pearson': for i in range(Nstim): for j in range(i, Nstim): # matrix is symmetric rep_sim[i, j], rep_sim_p[i, j] = st.pearsonr(response[i], response[j]) elif corr == 'spearman': for i in range(Nstim): for j in range(i, Nstim): # matrix is symmetric rep_sim[i, j], rep_sim_p[i, j] = st.spearmanr(response[i], response[j]) else: raise Exception('correlation should be pearson or spearman') rep_sim = np.triu(rep_sim) + np.triu(rep_sim, 1).T # fill in lower triangle rep_sim_p = np.triu(rep_sim_p) + np.triu(rep_sim_p, 1).T # fill in lower triangle return rep_sim, rep_sim_p
[docs] def get_noise_correlation(self, corr='spearman'): logging.debug("Calculating noise correlations") response = self.reshape_response_array() noise_corr = np.zeros((self.numbercells, self.numbercells, self.number_scenes)) noise_corr_p = np.zeros((self.numbercells, self.numbercells, self.number_scenes)) if corr == 'pearson': for k in range(self.number_scenes): for i in range(self.numbercells): for j in range(i, self.numbercells): noise_corr[i, j, k], noise_corr_p[i, j, k] = st.pearsonr(response[i, k], response[j, k]) noise_corr[:, :, k] = np.triu(noise_corr[:, :, k]) + np.triu(noise_corr[:, :, k], 1).T noise_corr_p[:, :, k] = np.triu(noise_corr_p[:, :, k]) + np.triu(noise_corr_p[:, :, k], 1).T elif corr == 'spearman': for k in range(self.number_scenes): for i in range(self.numbercells): for j in range(i, self.numbercells): noise_corr[i, j, k], noise_corr_p[i, j, k] = st.spearmanr(response[i, k], response[j, k]) noise_corr[:, :, k] = np.triu(noise_corr[:, :, k]) + np.triu(noise_corr[:, :, k], 1).T noise_corr_p[:, :, k] = np.triu(noise_corr_p[:, :, k]) + np.triu(noise_corr_p[:, :, k], 1).T else: raise Exception('correlation should be pearson or spearman') return noise_corr, noise_corr_p
[docs] @staticmethod def from_analysis_file(data_set, analysis_file): ns = NaturalScenes(data_set) ns.populate_stimulus_table() try: ns._sweep_response = pd.read_hdf(analysis_file, "analysis/sweep_response_ns") ns._mean_sweep_response = pd.read_hdf(analysis_file, "analysis/mean_sweep_response_ns") ns._peak = pd.read_hdf(analysis_file, "analysis/peak") with h5py.File(analysis_file, "r") as f: ns._response = f["analysis/response_ns"].value ns._binned_dx_sp = f["analysis/binned_dx_sp"].value ns._binned_cells_sp = f["analysis/binned_cells_sp"].value ns._binned_dx_vis = f["analysis/binned_dx_vis"].value ns._binned_cells_vis = f["analysis/binned_cells_vis"].value if "analysis/noise_corr_ns" in f: ns.noise_correlation = f["analysis/noise_corr_ns"].value if "analysis/signal_corr_ns" in f: ns.signal_correlation = f["analysis/signal_corr_ns"].value if "analysis/rep_similarity_ns" in f: ns.representational_similarity = f["analysis/rep_similarity_ns"].value except Exception as e: raise MissingStimulusException(e.args) return ns