# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2016-2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import scipy.stats as st
import numpy as np
import pandas as pd
from .stimulus_analysis import StimulusAnalysis
import logging
import h5py
from . import observatory_plots as oplots
from . import circle_plots as cplots
from .brain_observatory_exceptions import MissingStimulusException
[docs]class NaturalScenes(StimulusAnalysis):
""" Perform tuning analysis specific to natural scenes stimulus.
Parameters
----------
data_set: BrainObservatoryNwbDataSet object
"""
_log = logging.getLogger('allensdk.brain_observatory.natural_scenes')
def __init__(self, data_set, **kwargs):
super(NaturalScenes, self).__init__(data_set, **kwargs)
self._number_scenes = StimulusAnalysis._PRELOAD
self._sweeplength = StimulusAnalysis._PRELOAD
self._interlength = StimulusAnalysis._PRELOAD
self._extralength = StimulusAnalysis._PRELOAD
@property
def number_scenes(self):
if self._number_scenes is StimulusAnalysis._PRELOAD:
self.populate_stimulus_table()
return self._number_scenes
@property
def sweeplength(self):
if self._sweeplength is StimulusAnalysis._PRELOAD:
self.populate_stimulus_table()
return self._sweeplength
@property
def interlength(self):
if self._interlength is StimulusAnalysis._PRELOAD:
self.populate_stimulus_table()
return self._interlength
@property
def extralength(self):
if self._extralength is StimulusAnalysis._PRELOAD:
self.populate_stimulus_table()
return self._extralength
[docs] def populate_stimulus_table(self):
self._stim_table = self.data_set.get_stimulus_table('natural_scenes')
self._number_scenes = len(np.unique(self._stim_table.frame))
self._sweeplength = self._stim_table.end.iloc[
1] - self._stim_table.start.iloc[1]
self._interlength = 4 * self._sweeplength
self._extralength = self._sweeplength
[docs] def get_response(self):
''' Computes the mean response for each cell to each stimulus condition. Return is
a (# scenes, # cells, 3) np.ndarray. The final dimension
contains the mean response to the condition (index 0), standard error of the mean of the response
to the condition (index 1), and the number of trials with a significant (p < 0.05) response
to that condition (index 2).
Returns
-------
Numpy array storing mean responses.
'''
NaturalScenes._log.info("Calculating mean responses")
response = np.empty((self.number_scenes, self.numbercells + 1, 3))
def ptest(x):
return len(np.where(x < (0.05 / (self.number_scenes - 1)))[0])
for ns in range(self.number_scenes):
subset_response = self.mean_sweep_response[
self.stim_table.frame == (ns - 1)]
subset_pval = self.pval[self.stim_table.frame == (ns - 1)]
response[ns, :, 0] = subset_response.mean(axis=0)
response[ns, :, 1] = subset_response.std(
axis=0) / np.sqrt(len(subset_response))
response[ns, :, 2] = subset_pval.apply(ptest, axis=0)
return response
[docs] def get_peak(self):
''' Computes metrics about peak response condition for each cell.
Returns
-------
Pandas data frame with the following fields ('_ns' suffix is for
natural scene):
* scene_ns (scene number)
* reliability_ns
* peak_dff_ns (peak dF/F)
* ptest_ns
* p_run_ns
* run_modulation_ns
* time_to_peak_ns
'''
NaturalScenes._log.info('Calculating peak response properties')
peak = pd.DataFrame(index=range(self.numbercells), columns=('scene_ns', 'reliability_ns', 'peak_dff_ns',
'ptest_ns', 'p_run_ns', 'run_modulation_ns',
'time_to_peak_ns',
'cell_specimen_id','image_selectivity_ns'))
cids = self.data_set.get_cell_specimen_ids()
for nc in range(self.numbercells):
nsp = np.argmax(self.response[1:, nc, 0])
peak.cell_specimen_id.iloc[nc] = cids[nc]
peak.scene_ns[nc] = nsp
# peak.response_reliability_ns[nc] = self.response[
# nsp + 1, nc, 2] / 0.50 # assume 50 trials
peak.peak_dff_ns[nc] = self.response[nsp + 1, nc, 0]
# subset = self.mean_sweep_response[self.stim_table.frame == nsp]
# subset_stat = subset[subset.dx < 2]
# subset_run = subset[subset.dx >= 2]
# if (len(subset_run) > 5) & (len(subset_stat) > 5):
# (_, peak.p_run_ns[nc]) = st.ks_2samp(
# subset_run[str(nc)], subset_stat[str(nc)])
# peak.run_modulation_ns[nc] = subset_run[
# str(nc)].mean() / subset_stat[str(nc)].mean()
# else:
# peak.p_run_ns[nc] = np.NaN
# peak.run_modulation_ns[nc] = np.NaN
groups = []
for im in range(self.number_scenes):
subset = self.mean_sweep_response[
self.stim_table.frame == (im - 1)]
groups.append(subset[str(nc)].values)
(_, peak.ptest_ns[nc]) = st.f_oneway(*groups)
test = self.sweep_response[
self.stim_table.frame == nsp][str(nc)].mean()
peak.time_to_peak_ns[nc] = (
np.argmax(test) - self.interlength) / self.acquisition_rate
#running modulation
subset = self.mean_sweep_response[self.stim_table.frame==nsp]
subset_run = subset[subset.dx>=1]
subset_stat = subset[subset.dx<1]
if (len(subset_run)>4) & (len(subset_stat)>4):
(_,peak.p_run_ns.iloc[nc]) = st.ttest_ind(subset_run[str(nc)], subset_stat[str(nc)], equal_var=False)
if subset_run[str(nc)].mean()>subset_stat[str(nc)].mean():
peak.run_modulation_ns.iloc[nc] = (subset_run[str(nc)].mean() - subset_stat[str(nc)].mean())/np.abs(subset_run[str(nc)].mean())
elif subset_run[str(nc)].mean()<subset_stat[str(nc)].mean():
peak.run_modulation_ns.iloc[nc] = -1*((subset_stat[str(nc)].mean() - subset_run[str(nc)].mean())/np.abs(subset_stat[str(nc)].mean()))
else:
peak.p_run_ns.iloc[nc] = np.NaN
peak.run_modulation_ns.iloc[nc] = np.NaN
#reliability
subset = self.sweep_response[self.stim_table.frame==nsp]
corr_matrix = np.empty((len(subset),len(subset)))
for i in range(len(subset)):
for j in range(len(subset)):
r,p = st.pearsonr(subset[str(nc)].iloc[i][28:42], subset[str(nc)].iloc[j][28:42])
corr_matrix[i,j] = r
mask = np.ones((len(subset), len(subset)))
for i in range(len(subset)):
for j in range(len(subset)):
if i>=j:
mask[i,j] = np.NaN
corr_matrix *= mask
peak.reliability_ns.iloc[nc] = np.nanmean(corr_matrix)
#image selectivity
fmin = self.response[1:,nc,0].min()
fmax = self.response[1:,nc,0].max()
rtj = np.empty((1000,1))
for j in range(1000):
thresh = fmin + j*((fmax-fmin)/1000.)
theta = np.empty((118,1))
for im in range(118):
if self.response[im+1,nc,0] > thresh: #im+1 to only look at images, not blanksweep
theta[im] = 1
else:
theta[im] = 0
rtj[j] = theta.mean()
biga = rtj.mean()
bigs = 1 - (2*biga)
peak.image_selectivity_ns.iloc[nc] = bigs
return peak
[docs] def plot_time_to_peak(self,
p_value_max=oplots.P_VALUE_MAX,
color_map=oplots.STIMULUS_COLOR_MAP):
stimulus_table = self.data_set.get_stimulus_table('natural_scenes')
resps = []
for index, row in self.peak.iterrows():
mean_response = self.sweep_response.ix[stimulus_table.frame==row.scene_ns][str(index)].mean()
resps.append((mean_response - mean_response.mean() / mean_response.std()))
mean_responses = np.array(resps)
sorted_table = self.peak[self.peak.ptest_ns < p_value_max].sort_values('time_to_peak_ns')
cell_order = sorted_table.index
# time to peak is relative to stimulus start in seconds
ttps = sorted_table.time_to_peak_ns.values + self.interlength / self.acquisition_rate
msrs_sorted = mean_responses[cell_order,:]
oplots.plot_time_to_peak(msrs_sorted, ttps,
0, (2*self.interlength + self.sweeplength) / self.acquisition_rate,
(self.interlength) / self.acquisition_rate,
(self.interlength + self.sweeplength) / self.acquisition_rate,
color_map)
[docs] def open_corona_plot(self, cell_specimen_id=None, cell_index=None):
cell_index = self.row_from_cell_id(cell_specimen_id, cell_index)
df = self.mean_sweep_response[str(cell_index)]
data = df.values
st = self.data_set.get_stimulus_table('natural_scenes')
mask = st[st.frame >= 0].index
cmin = self.response[0,cell_index,0]
cmax = data.mean() + data.std()*3
cp = cplots.CoronaPlotter()
cp.plot(st.frame.ix[mask].values,
data=df.ix[mask].values,
clim=[cmin, cmax])
cp.show_arrow()
[docs] def reshape_response_array(self):
'''
:return: response array in cells x stim x repetition for noise correlations
'''
mean_sweep_response = self.mean_sweep_response.values[:, :self.numbercells]
stim_table = self.stim_table
frames = np.unique(stim_table.frame.values)
reps = [len(np.where(stim_table.frame.values == frame)[0]) for frame in frames]
Nreps = min(reps) # just in case there are different numbers of repetitions
response_new = np.zeros((self.numbercells, self.number_scenes), dtype='object')
for i, frame in enumerate(frames):
ind = np.where(stim_table.frame.values == frame)[0][:Nreps]
for c in range(self.numbercells):
response_new[c, i] = mean_sweep_response[ind, c]
return response_new
[docs] def get_signal_correlation(self, corr='spearman'):
logging.debug("Calculating signal correlations")
response = self.response[:, :, 0].T
response = response[:self.numbercells, :]
N, Nstim = response.shape
signal_corr = np.zeros((N, N))
signal_p = np.empty((N, N))
if corr == 'pearson':
for i in range(N):
for j in range(i, N): # matrix is symmetric
signal_corr[i, j], signal_p[i, j] = st.pearsonr(response[i], response[j])
elif corr == 'spearman':
for i in range(N):
for j in range(i, N): # matrix is symmetric
signal_corr[i, j], signal_p[i, j] = st.spearmanr(response[i], response[j])
else:
raise Exception('correlation should be pearson or spearman')
signal_corr = np.triu(signal_corr) + np.triu(signal_corr, 1).T # fill in lower triangle
signal_p = np.triu(signal_p) + np.triu(signal_p, 1).T # fill in lower triangle
return signal_corr, signal_p
[docs] def get_representational_similarity(self, corr='spearman'):
logging.debug("Calculating representational similarity")
response = self.response[:, :, 0]
response = response[:, :self.numbercells]
Nstim, N = response.shape
rep_sim = np.zeros((Nstim, Nstim))
rep_sim_p = np.empty((Nstim, Nstim))
if corr == 'pearson':
for i in range(Nstim):
for j in range(i, Nstim): # matrix is symmetric
rep_sim[i, j], rep_sim_p[i, j] = st.pearsonr(response[i], response[j])
elif corr == 'spearman':
for i in range(Nstim):
for j in range(i, Nstim): # matrix is symmetric
rep_sim[i, j], rep_sim_p[i, j] = st.spearmanr(response[i], response[j])
else:
raise Exception('correlation should be pearson or spearman')
rep_sim = np.triu(rep_sim) + np.triu(rep_sim, 1).T # fill in lower triangle
rep_sim_p = np.triu(rep_sim_p) + np.triu(rep_sim_p, 1).T # fill in lower triangle
return rep_sim, rep_sim_p
[docs] def get_noise_correlation(self, corr='spearman'):
logging.debug("Calculating noise correlations")
response = self.reshape_response_array()
noise_corr = np.zeros((self.numbercells, self.numbercells, self.number_scenes))
noise_corr_p = np.zeros((self.numbercells, self.numbercells, self.number_scenes))
if corr == 'pearson':
for k in range(self.number_scenes):
for i in range(self.numbercells):
for j in range(i, self.numbercells):
noise_corr[i, j, k], noise_corr_p[i, j, k] = st.pearsonr(response[i, k], response[j, k])
noise_corr[:, :, k] = np.triu(noise_corr[:, :, k]) + np.triu(noise_corr[:, :, k], 1).T
noise_corr_p[:, :, k] = np.triu(noise_corr_p[:, :, k]) + np.triu(noise_corr_p[:, :, k], 1).T
elif corr == 'spearman':
for k in range(self.number_scenes):
for i in range(self.numbercells):
for j in range(i, self.numbercells):
noise_corr[i, j, k], noise_corr_p[i, j, k] = st.spearmanr(response[i, k], response[j, k])
noise_corr[:, :, k] = np.triu(noise_corr[:, :, k]) + np.triu(noise_corr[:, :, k], 1).T
noise_corr_p[:, :, k] = np.triu(noise_corr_p[:, :, k]) + np.triu(noise_corr_p[:, :, k], 1).T
else:
raise Exception('correlation should be pearson or spearman')
return noise_corr, noise_corr_p
[docs] @staticmethod
def from_analysis_file(data_set, analysis_file):
ns = NaturalScenes(data_set)
ns.populate_stimulus_table()
try:
ns._sweep_response = pd.read_hdf(analysis_file, "analysis/sweep_response_ns")
ns._mean_sweep_response = pd.read_hdf(analysis_file, "analysis/mean_sweep_response_ns")
ns._peak = pd.read_hdf(analysis_file, "analysis/peak")
with h5py.File(analysis_file, "r") as f:
ns._response = f["analysis/response_ns"].value
ns._binned_dx_sp = f["analysis/binned_dx_sp"].value
ns._binned_cells_sp = f["analysis/binned_cells_sp"].value
ns._binned_dx_vis = f["analysis/binned_dx_vis"].value
ns._binned_cells_vis = f["analysis/binned_cells_vis"].value
if "analysis/noise_corr_ns" in f:
ns.noise_correlation = f["analysis/noise_corr_ns"].value
if "analysis/signal_corr_ns" in f:
ns.signal_correlation = f["analysis/signal_corr_ns"].value
if "analysis/rep_similarity_ns" in f:
ns.representational_similarity = f["analysis/rep_similarity_ns"].value
except Exception as e:
raise MissingStimulusException(e.args)
return ns