# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import functools
import six
import h5py
[docs]def decode_bytes(bytes_dataset, encoding='UTF-8'):
''' Convert the elements of a dataset of bytes to str
'''
return [ item.decode(encoding) for item in bytes_dataset[:].flat ]
[docs]def load_datasets_by_relnames(relnames, h5_file, start_node):
''' A convenience function for finding and loading into memory one or more
datasets from an h5 file
'''
matcher_cbs = {
relname: functools.partial(h5_object_matcher_relname_in, [relname])
for relname in relnames
}
matches = keyed_locate_h5_objects(matcher_cbs, h5_file, start_node=start_node)
return { key: value[:] for key, value in six.iteritems(matches) }
[docs]def h5_object_matcher_relname_in(relnames, h5_object_name, h5_object):
''' Asks if an h5 object's relative name (the final section of its absolute name)
is contained within a provided array
Parameters
----------
relnames : array-like
Relative names against which to match
h5_object_name : str
Full name (path from origin) of h5 object
h5_object : h5py.Group, h5py.Dataset
Check this object's relative name
Returns
-------
bool :
whether the match succeeded
h5_object : h5py.group, h5py.Dataset
the argued object
'''
return h5_object_name.split('/')[-1] in relnames, h5_object
[docs]def keyed_locate_h5_objects(matcher_cbs, h5_file, start_node=None):
''' Traverse an h5 file and build up a dictionary mapping supplied keys to
located objects
'''
matches = {}
def matcher(obj_name, obj):
for key, matcher_cb in six.iteritems(matcher_cbs):
match, _ = matcher_cb(obj_name, obj)
if match:
matches[key] = obj
traverse_h5_file(matcher, h5_file, start_node)
return matches
[docs]def locate_h5_objects(matcher_cb, h5_file, start_node=None):
''' Traverse an h5 file and return objects matching supplied criteria
'''
matches = []
def matcher(h5_object_name, h5_object):
match, _ = matcher_cb(h5_object_name, h5_object)
if match:
matches.append(h5_object)
traverse_h5_file(matcher, h5_file, start_node)
return matches
[docs]def traverse_h5_file(callback, h5_file, start_node=None):
''' Traverse an h5 file and apply a callback to each node
'''
if start_node is None:
start_node = h5_file['/']
elif isinstance(start_node, str):
start_node = h5_file[start_node]
start_node.visititems(callback)