# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2015-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
from collections import defaultdict
from six import string_types
import numpy as np
import pandas as pd
from allensdk.deprecated import class_deprecated
[docs]@class_deprecated('Use StructureTree instead.')
class Ontology(object):
'''
.. note:: Deprecated from 0.12.5
`Ontology` has been replaced by `StructureTree`.
'''
def __init__(self, df):
self.df = df
child_ids = defaultdict(set)
descendant_ids = defaultdict(set)
for _, s in df.iterrows():
sid = s.name
parent_id = s['parent_structure_id']
if np.isfinite(parent_id):
parent_id = int(parent_id)
child_ids[parent_id].add(sid)
parent_id_list = map(int, s['structure_id_path'].split('/')[1:-1])
for parent_id in parent_id_list:
descendant_ids[parent_id].add(sid)
self.child_ids = dict(child_ids)
self.descendant_ids = dict(descendant_ids)
def __getitem__(self, structures):
"""
Return a subset of structures by id or acronym. Duplicate values are ignored.
Parameters
----------
structures: tuple
Elements can be pandas.Series objects, which are expected to be structure ids.
Elements can be strings, which are expected to be acronyms.
All other elements must be cast-able to int, which are treated as structure ids.
Returns
-------
pandas.DataFrame
A subset of rows from the complete ontology that match filtering criteria.
"""
# __getitem__ always has a single argument. If called with a single argument
# (e.g. ontology[315]), that item is passed straight through. If called with
# multiple arguments (e.g. ontology[315,997]), that gets passed through as a
# tuple. This normalizes the arguments so that everything is iterable.
if not isinstance(structures, tuple) and not isinstance(structures, list) and not isinstance(structures, set):
structures = structures,
# this is the final set of structure ids used to filter
structure_ids = set()
string_strs = []
for s in structures:
if isinstance(s, pd.Series):
# if it's a pandas series, assume it's a series of structure
# ids
structure_ids.update(s.tolist())
elif isinstance(s, string_types):
# if it's a string, assume it's an acronym
string_strs.append(s)
else:
# if it's anything else, cast it to an integer and treat it
# like a structure id
structure_ids.add(int(s))
# convert the string arguments to rows
if len(string_strs):
# pull out the rows that match these acronyms
string_strs = self.df[self.df['acronym'].isin(string_strs)]
# if there are no other structure ids, just return this dataframe
if len(structure_ids) == 0:
return string_strs
# otherwise pull out the ids and add them to the set
structure_ids.update(string_strs.id.tolist())
return self.df.loc[structure_ids].dropna(axis=0, how='all')
[docs] def get_descendant_ids(self, structure_ids):
"""
Find the set of the ids of structures that are descendants of one or more structures.
The returned set will include the input structure ids.
Parameters
----------
structure_ids: iterable
Any iterable type that contains structure ids that can be cast to integers.
Returns
-------
set
Set of descendant structure ids.
"""
if len(structure_ids) == 0:
return self.descendant_ids
else:
descendants = set()
for structure_id in structure_ids:
descendants.update(self.descendant_ids.get(
int(structure_id), set()))
return descendants
[docs] def get_child_ids(self, structure_ids):
"""
Find the set of ids that are immediate children of one or more structures.
Parameters
----------
structure_ids: iterable
Any iterable type that contains structure ids that can be cast to integers.
Returns
-------
set
Set of child structure ids
"""
if len(structure_ids) == 0:
return self.child_ids
else:
children = set()
for structure_id in structure_ids:
children.update(self.child_ids.get(int(structure_id), set()))
return children
[docs] def get_descendants(self, structure_ids):
"""
Find the set of structures that are descendants of one or more structures.
The returned set will include the input structures.
Parameters
----------
structure_ids: iterable
Any iterable type that contains structure ids that can be cast to integers.
Returns
-------
pandas.DataFrame
Set of descendant structures.
"""
descendant_ids = self.get_descendant_ids(structure_ids)
return self[descendant_ids]
[docs] def get_children(self, structure_ids):
"""
Find the set of structures that are immediate children of one or more structures.
Parameters
----------
structure_ids: iterable
Any iterable type that contains structure ids that can be cast to integers.
Returns
-------
pandas.DataFrame
Set of child structures
"""
child_ids = self.get_child_ids(structure_ids)
return self[child_ids]
[docs] def structure_descends_from(self, child_id, parent_id):
"""
Return whether one structure id is a descendant of another structure id.
"""
child = self[child_id]
if child is not None:
parent_str = '/%d/' % parent_id
return child['structure_id_path'].values[0].find(parent_str) >= 0
return False