Source code for allensdk.core.ontology

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2015-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
from collections import defaultdict
from six import string_types
import numpy as np
import pandas as pd

from allensdk.deprecated import class_deprecated


[docs]@class_deprecated('Use StructureTree instead.') class Ontology(object): ''' .. note:: Deprecated from 0.12.5 `Ontology` has been replaced by `StructureTree`. ''' def __init__(self, df): self.df = df child_ids = defaultdict(set) descendant_ids = defaultdict(set) for _, s in df.iterrows(): sid = s.name parent_id = s['parent_structure_id'] if np.isfinite(parent_id): parent_id = int(parent_id) child_ids[parent_id].add(sid) parent_id_list = map(int, s['structure_id_path'].split('/')[1:-1]) for parent_id in parent_id_list: descendant_ids[parent_id].add(sid) self.child_ids = dict(child_ids) self.descendant_ids = dict(descendant_ids) def __getitem__(self, structures): """ Return a subset of structures by id or acronym. Duplicate values are ignored. Parameters ---------- structures: tuple Elements can be pandas.Series objects, which are expected to be structure ids. Elements can be strings, which are expected to be acronyms. All other elements must be cast-able to int, which are treated as structure ids. Returns ------- pandas.DataFrame A subset of rows from the complete ontology that match filtering criteria. """ # __getitem__ always has a single argument. If called with a single argument # (e.g. ontology[315]), that item is passed straight through. If called with # multiple arguments (e.g. ontology[315,997]), that gets passed through as a # tuple. This normalizes the arguments so that everything is iterable. if not isinstance(structures, tuple) and not isinstance(structures, list) and not isinstance(structures, set): structures = structures, # this is the final set of structure ids used to filter structure_ids = set() string_strs = [] for s in structures: if isinstance(s, pd.Series): # if it's a pandas series, assume it's a series of structure # ids structure_ids.update(s.tolist()) elif isinstance(s, string_types): # if it's a string, assume it's an acronym string_strs.append(s) else: # if it's anything else, cast it to an integer and treat it # like a structure id structure_ids.add(int(s)) # convert the string arguments to rows if len(string_strs): # pull out the rows that match these acronyms string_strs = self.df[self.df['acronym'].isin(string_strs)] # if there are no other structure ids, just return this dataframe if len(structure_ids) == 0: return string_strs # otherwise pull out the ids and add them to the set structure_ids.update(string_strs.id.tolist()) return self.df.loc[structure_ids].dropna(axis=0, how='all')
[docs] def get_descendant_ids(self, structure_ids): """ Find the set of the ids of structures that are descendants of one or more structures. The returned set will include the input structure ids. Parameters ---------- structure_ids: iterable Any iterable type that contains structure ids that can be cast to integers. Returns ------- set Set of descendant structure ids. """ if len(structure_ids) == 0: return self.descendant_ids else: descendants = set() for structure_id in structure_ids: descendants.update(self.descendant_ids.get( int(structure_id), set())) return descendants
[docs] def get_child_ids(self, structure_ids): """ Find the set of ids that are immediate children of one or more structures. Parameters ---------- structure_ids: iterable Any iterable type that contains structure ids that can be cast to integers. Returns ------- set Set of child structure ids """ if len(structure_ids) == 0: return self.child_ids else: children = set() for structure_id in structure_ids: children.update(self.child_ids.get(int(structure_id), set())) return children
[docs] def get_descendants(self, structure_ids): """ Find the set of structures that are descendants of one or more structures. The returned set will include the input structures. Parameters ---------- structure_ids: iterable Any iterable type that contains structure ids that can be cast to integers. Returns ------- pandas.DataFrame Set of descendant structures. """ descendant_ids = self.get_descendant_ids(structure_ids) return self[descendant_ids]
[docs] def get_children(self, structure_ids): """ Find the set of structures that are immediate children of one or more structures. Parameters ---------- structure_ids: iterable Any iterable type that contains structure ids that can be cast to integers. Returns ------- pandas.DataFrame Set of child structures """ child_ids = self.get_child_ids(structure_ids) return self[child_ids]
[docs] def structure_descends_from(self, child_id, parent_id): """ Return whether one structure id is a descendant of another structure id. """ child = self[child_id] if child is not None: parent_str = '/%d/' % parent_id return child['structure_id_path'].values[0].find(parent_str) >= 0 return False