# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2014-2016. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
import os
import sys
import re
import logging
import errno
import pandas as pd
[docs]class ManifestVersionError(Exception):
def __init__(self, message, version, found_version):
super(ManifestVersionError, self).__init__(message)
self.found_version = found_version
self.version = version
[docs]class Manifest(object):
"""Manages the location of external files
referenced in an Allen SDK configuration """
DIR = 'dir'
FILE = 'file'
DIRNAME = 'dir_name'
VERSION = 'manifest_version'
log = logging.getLogger(__name__)
def __init__(self, config=None, relative_base_dir='.', version=None):
self.path_info = {}
self.relative_base_dir = relative_base_dir
if config is not None:
self.load_config(config, version=version)
[docs] def load_config(self, config, version=None):
''' Load paths into the manifest from an Allen SDK config section.
Parameters
----------
config : Config
Manifest section of an Allen SDK config.
'''
found_version = None
for path_info in config:
path_type = path_info['type']
path_format = None
if 'format' in path_info:
path_format = path_info['format']
if path_type == 'file':
try:
parent_key = path_info['parent_key']
except:
parent_key = None
self.add_file(path_info['key'],
path_info['spec'],
parent_key,
path_format)
elif path_type == 'dir':
try:
parent_key = path_info['parent_key']
except:
parent_key = None
spec = path_info['spec']
absolute = False
if spec[0] == '/':
absolute = True
self.add_path(path_info['key'],
path_info['spec'],
path_type,
absolute,
path_format,
parent_key)
elif path_type == self.VERSION:
found_version = path_info['value']
else:
Manifest.log.warning("Unknown path type in manifest: %s" %
(path_type))
if found_version != version:
raise ManifestVersionError("", version, found_version)
self.version = version
[docs] def add_path(self, key, path, path_type=DIR,
absolute=True, path_format=None, parent_key=None):
'''Insert a new entry.
Parameters
----------
key : string
Identifier for referencing the entry.
path : string
Specification for a path using %s, %d style substitution.
path_type : string enumeration
'dir' (default) or 'file'
absolute : boolean
Is the spec relative to the process current directory.
path_format : string, optional
Indicate a known file type for further parsing.
parent_key : string
Refer to another entry.
'''
if parent_key:
path_args = []
try:
parent_path = self.path_info[parent_key]['spec']
path_args.append(parent_path)
except:
Manifest.log.error(
"cannot resolve directory key %s" % (parent_key))
raise
path_args.extend(path.split('/'))
path = os.path.join(*path_args)
# TODO: relative paths need to be considered better
if absolute is True:
path = os.path.abspath(path)
else:
path = os.path.abspath(os.path.join(self.relative_base_dir, path))
if path_type == Manifest.DIRNAME:
path = os.path.dirname(path)
self.path_info[key] = {'type': path_type,
'spec': path}
if path_type == Manifest.FILE and path_format is not None:
self.path_info[key]['format'] = path_format
[docs] def add_paths(self, path_info):
''' add information about paths stored in the manifest.
Parameters
path_info : dict
Information about the new paths
'''
for path_key, path_data in path_info.items():
path_format = None
if 'format' in path_data:
path_format = path_data['format']
Manifest.log.info("Adding path. type: %s, format: %s, spec: %s" %
(path_data['type'],
path_data['spec'],
path_format))
entry = {'type': path_data['type'],
'spec': path_data['spec']
}
if path_format is not None:
entry['format'] = path_format
self.path_info[path_key] = entry
[docs] def add_file(self,
file_key,
file_name,
dir_key=None,
path_format=None):
'''Insert a new file entry.
Parameters
----------
file_key : string
Reference to the entry.
file_name : string
Subtitutions of the %s, %d style allowed.
dir_key : string
Reference to the parent directory entry.
path_format : string, optional
File type for further parsing.
'''
path_args = []
if dir_key:
try:
dir_path = self.path_info[dir_key]['spec']
path_args.append(dir_path)
except:
Manifest.log.error(
"cannot resolve directory key %s" % (dir_key))
raise
elif not file_name.startswith('/'):
path_args.append(os.curdir)
else:
path_args.append(os.path.sep)
path_args.extend(file_name.split('/'))
file_path = os.path.join(*path_args)
self.path_info[file_key] = {'type': Manifest.FILE,
'spec': file_path}
if path_format:
self.path_info[file_key]['format'] = path_format
[docs] def get_path(self, path_key, *args):
'''Retrieve an entry with substitutions.
Parameters
----------
path_key : string
Refer to the entry to retrieve.
args : any types, optional
arguments to be substituted into the path spec for %s, %d, etc.
Returns
-------
string
Path with parent structure and substitutions applied.
'''
path_spec = self.path_info[path_key]['spec']
if args is not None and len(args) != 0:
path = path_spec % args
else:
path = path_spec
return path
[docs] @classmethod
def safe_make_parent_dirs(cls, file_name):
''' Create a parent directories for file.
Parameters
----------
file_name : string
'''
dirname = os.path.dirname(file_name)
# do nothing if there are no parent directories
if not dirname:
return
Manifest.safe_mkdir(dirname)
[docs] @classmethod
def safe_mkdir(cls, directory):
'''Create path if not already there.
Parameters
----------
directory : string
create it if it doesn't exist
'''
try:
os.makedirs(directory)
except OSError as e:
if ((sys.platform == "darwin") and (e.errno == errno.EISDIR) and \
(e.filename == "/")):
# undocumented behavior of mkdir on OSX where for / it raises
# EISDIR and not EEXIST
# https://bugs.python.org/issue24231 (old but still holds true)
pass
elif sys.platform == "win32" and e.errno == errno.EACCES:
root_path = os.path.abspath(os.sep)
if e.filename == root_path or \
e.filename == root_path.replace("\\", "/"):
# When attempting to os.makedirs the root drive letter on
# Windows, EACCES is raised, not EEXIST
pass
else:
raise
elif e.errno == errno.EEXIST:
pass
else:
raise
[docs] def create_dir(self, path_key):
'''Make a directory for an entry.
Parameters
----------
path_key : string
Reference to the entry.
'''
dir_path = self.get_path(path_key)
Manifest.safe_mkdir(dir_path)
[docs] def check_dir(self, path_key, do_exit=False):
'''Verify a directories existence or optionally exit.
Parameters
----------
path_key : string
Reference to the entry.
do_exit : boolean
What to do if the directory is not present.
'''
dir_path = self.get_path(path_key)
if not os.path.exists(dir_path):
Manifest.log.fatal('Directory %s does not exist; exiting.' %
(dir_path))
if do_exit is True:
quit()
[docs] def resolve_paths(self, description_dict, suffix='_key'):
'''Walk input items and expand those that refer to a manifest entry.
Parameters
----------
description_dict : dict
Any entries with key names ending in suffix will be expanded.
suffix : string
Indicates the entries to be expanded.
'''
key_pattern = re.compile('(.*)%s$' % (suffix))
for description_key, manifest_key in description_dict.items():
m = key_pattern.match(description_key)
if m:
real_key = m.group(1) # i.e. job_dir_key -> job_dir
filename = self.get_path(manifest_key)
description_dict[real_key] = filename
del description_dict[description_key]
[docs] def as_dataframe(self):
return pd.DataFrame.from_dict(self.path_info,
orient='index')