# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
from contextlib import closing
import logging
import os
import errno
import warnings
import io
import zipfile
import requests
import pandas as pd
from requests_toolbelt import exceptions
from requests_toolbelt.downloadutils import stream
import allensdk.core.json_utilities as json_utilities
[docs]class Api(object):
_log = logging.getLogger('allensdk.api.api')
_file_download_log = logging.getLogger('allensdk.api.api.retrieve_file_over_http')
default_api_url = 'http://api.brain-map.org'
download_url = 'http://download.alleninstitute.org'
def __init__(self, api_base_url_string=None):
if api_base_url_string is None:
api_base_url_string = Api.default_api_url
self.set_api_urls(api_base_url_string)
self.default_working_directory = os.getcwd()
[docs] def set_api_urls(self, api_base_url_string):
'''Set the internal RMA and well known file download endpoint urls
based on a api server endpoint.
Parameters
----------
api_base_url_string : string
url of the api to point to
'''
self.api_url = api_base_url_string
# http://help.brain-map.org/display/api/Downloading+a+WellKnownFile
self.well_known_file_endpoint = api_base_url_string + \
'/api/v2/well_known_file_download'
# http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data
self.grid_data_endpoint = api_base_url_string + '/grid_data'
# http://help.brain-map.org/display/api/Downloading+and+Displaying+SVG
self.svg_endpoint = api_base_url_string + '/api/v2/svg'
self.svg_download_endpoint = api_base_url_string + '/api/v2/svg_download'
# http://help.brain-map.org/display/api/Downloading+an+Ontology%27s+Structure+Graph
self.structure_graph_endpoint = api_base_url_string + \
'/api/v2/structure_graph_download'
# http://help.brain-map.org/display/api/Searching+a+Specimen+or+Structure+Tree
self.tree_search_endpoint = api_base_url_string + '/api/v2/tree_search'
# http://help.brain-map.org/display/api/Searching+Annotated+SectionDataSets
self.annotated_section_data_sets_endpoint = api_base_url_string + \
'/api/v2/annotated_section_data_sets'
self.compound_annotated_section_data_sets_endpoint = api_base_url_string + \
'/api/v2/compound_annotated_section_data_sets'
# http://help.brain-map.org/display/api/Image-to-Image+Synchronization#Image-to-ImageSynchronization-ImagetoImage
self.image_to_atlas_endpoint = api_base_url_string + '/api/v2/image_to_atlas'
self.image_to_image_endpoint = api_base_url_string + '/api/v2/image_to_image'
self.image_to_image_2d_endpoint = api_base_url_string + '/api/v2/image_to_image_2d'
self.reference_to_image_endpoint = api_base_url_string + '/api/v2/reference_to_image'
self.image_to_reference_endpoint = api_base_url_string + '/api/v2/image_to_reference'
self.structure_to_image_endpoint = api_base_url_string + '/api/v2/structure_to_image'
# http://help.brain-map.org/display/mouseconnectivity/API
self.section_image_download_endpoint = api_base_url_string + \
'/api/v2/section_image_download'
self.atlas_image_download_endpoint = api_base_url_string + \
'/api/v2/atlas_image_download'
self.projection_image_download_endpoint = api_base_url_string + \
'/api/v2/projection_image_download'
self.image_download_endpoint = api_base_url_string + \
'/api/v2/image_download'
self.informatics_archive_endpoint = Api.download_url + '/informatics-archive'
self.rma_endpoint = api_base_url_string + '/api/v2/data'
[docs] def set_default_working_directory(self, working_directory):
'''Set the working directory where files will be saved.
Parameters
----------
working_directory : string
the absolute path string of the working directory.
'''
self.default_working_directory = working_directory
[docs] def read_data(self, parsed_json):
'''Return the message data from the parsed query.
Parameters
----------
parsed_json : dict
A python structure corresponding to the JSON data returned from the API.
Notes
-----
See `API Response Formats - Response Envelope <http://help.brain-map.org/display/api/API+Response+Formats#APIResponseFormats-ResponseEnvelope>`_
for additional documentation.
'''
return parsed_json['msg']
[docs] def json_msg_query(self, url, dataframe=False):
''' Common case where the url is fully constructed
and the response data is stored in the 'msg' field.
Parameters
----------
url : string
Where to get the data in json form
dataframe : boolean
True converts to a pandas dataframe, False (default) doesn't
Returns
-------
dict or DataFrame
returned data; type depends on dataframe option
'''
data = self.do_query(lambda *a, **k: url,
self.read_data)
if dataframe is True:
warnings.warn("dataframe argument is deprecated", DeprecationWarning)
data = pd.DataFrame(data)
return data
[docs] def do_query(self, url_builder_fn, json_traversal_fn, *args, **kwargs):
'''Bundle an query url construction function
with a corresponding response json traversal function.
Parameters
----------
url_builder_fn : function
A function that takes parameters and returns an rma url.
json_traversal_fn : function
A function that takes a json-parsed python data structure and returns data from it.
post : boolean, optional kwarg
True does an HTTP POST, False (default) does a GET
args : arguments
Arguments to be passed to the url builder function.
kwargs : keyword arguments
Keyword arguments to be passed to the rma builder function.
Returns
-------
any type
The data extracted from the json response.
Examples
--------
`A simple Api subclass example
<data_api_client.html#creating-new-api-query-classes>`_.
'''
api_url = url_builder_fn(*args, **kwargs)
post = kwargs.get('post', False)
json_parsed_data = self.retrieve_parsed_json_over_http(api_url, post)
return json_traversal_fn(json_parsed_data)
[docs] def do_rma_query(self, rma_builder_fn, json_traversal_fn, *args, **kwargs):
'''Bundle an RMA query url construction function
with a corresponding response json traversal function.
..note:: Deprecated in AllenSDK 0.9.2
`do_rma_query` will be removed in AllenSDK 1.0, it is replaced by
`do_query` because the latter is more general.
Parameters
----------
rma_builder_fn : function
A function that takes parameters and returns an rma url.
json_traversal_fn : function
A function that takes a json-parsed python data structure and returns data from it.
args : arguments
Arguments to be passed to the rma builder function.
kwargs : keyword arguments
Keyword arguments to be passed to the rma builder function.
Returns
-------
any type
The data extracted from the json response.
Examples
--------
`A simple Api subclass example
<data_api_client.html#creating-new-api-query-classes>`_.
'''
return self.do_query(rma_builder_fn, json_traversal_fn, *args, **kwargs)
[docs] def load_api_schema(self):
'''Download the RMA schema from the current RMA endpoint
Returns
-------
dict
the parsed json schema message
Notes
-----
This information and other
`Allen Brain Atlas Data Portal Data Model <http://help.brain-map.org/display/api/Data+Model>`_
documentation is also available as a
`Class Hierarchy <http://api.brain-map.org/class_hierarchy>`_
and `Class List <http://api.brain-map.org/class_hierarchy>`_.
'''
schema_url = self.rma_endpoint + '/enumerate.json'
json_parsed_schema_data = self.retrieve_parsed_json_over_http(
schema_url)
return json_parsed_schema_data
[docs] def construct_well_known_file_download_url(self, well_known_file_id):
'''Join data api endpoint and id.
Parameters
----------
well_known_file_id : integer or string representing an integer
well known file id
Returns
-------
string
the well-known-file download url for the current api api server
See Also
--------
retrieve_file_over_http: Can be used to retrieve the file from the url.
'''
return self.well_known_file_endpoint + '/' + str(well_known_file_id)
[docs] def cleanup_truncated_file(self, file_path):
'''Helper for removing files.
Parameters
----------
file_path : string
Absolute path including the file name to remove.'''
try:
os.remove(file_path)
except OSError as e:
if e.errno != errno.ENOENT:
raise
[docs] def retrieve_file_over_http(self, url, file_path, zipped=False):
'''Get a file from the data api and save it.
Parameters
----------
url : string
Url[1]_ from which to get the file.
file_path : string
Absolute path including the file name to save.
zipped : bool, optional
If true, assume that the response is a zipped directory and attempt
to extract contained files into the directory containing file_path.
Default is False.
See Also
--------
construct_well_known_file_download_url: Can be used to construct the url.
References
----------
.. [1] Allen Brain Atlas Data Portal: `Downloading a WellKnownFile <http://help.brain-map.org/display/api/Downloading+a+WellKnownFile>`_.
'''
self._file_download_log.info("Downloading URL: %s", url)
try:
if zipped:
stream_zip_directory_over_http(url, os.path.dirname(file_path))
else:
stream_file_over_http(url, file_path)
except exceptions.StreamingError as e:
self._file_download_log.error("Couldn't retrieve file %s from %s (streaming)." % (file_path,url))
self.cleanup_truncated_file(file_path)
raise
except requests.exceptions.ConnectionError as e:
self._file_download_log.error("Couldn't retrieve file %s from %s (connection)." % (file_path,url))
self.cleanup_truncated_file(file_path)
raise
except requests.exceptions.ReadTimeout as e:
self._file_download_log.error("Couldn't retrieve file %s from %s (timeout)." % (file_path,url))
self.cleanup_truncated_file(file_path)
raise
except requests.exceptions.RequestException as e:
self._file_download_log.error("Couldn't retrieve file %s from %s (request)." % (file_path,url))
self.cleanup_truncated_file(file_path)
raise
except Exception as e:
self._file_download_log.error("Couldn't retrieve file %s from %s" % (file_path, url))
self.cleanup_truncated_file(file_path)
raise
[docs] def retrieve_parsed_json_over_http(self, url, post=False):
'''Get the document and put it in a Python data structure
Parameters
----------
url : string
Full API query url.
post : boolean
True does an HTTP POST, False (default) encodes the URL and does a GET
Returns
-------
dict
Result document as parsed by the JSON library.
'''
self._log.info("Downloading URL: %s", url)
if post is False:
data = json_utilities.read_url_get(
requests.utils.quote(url,
';/?:@&=+$,'))
else:
data = json_utilities.read_url_post(url)
return data
[docs] def retrieve_xml_over_http(self, url):
'''Get the document and put it in a Python data structure
Parameters
----------
url : string
Full API query url.
Returns
-------
string
Unparsed xml string.
'''
self._log.info("Downloading URL: %s", url)
response = requests.get(url)
return response.content
[docs]def stream_zip_directory_over_http(url, directory, members=None, timeout=(9.05, 31.1)):
''' Supply an http get request and stream the response to a file.
Parameters
----------
url : str
Send the request to this url
directory : str
Extract the response to this directory
members : list of str, optional
Extract only these files
timeout : float or tuple of float, optional
Specify a timeout for the request. If a tuple, specify seperate connect
and read timeouts.
'''
buf = io.BytesIO()
with closing( requests.get(url, stream=True, timeout=timeout) ) as request:
stream.stream_response_to_file( request, buf )
zipper = zipfile.ZipFile(buf)
zipper.extractall(path=directory, members=members)
zipper.close()
[docs]def stream_file_over_http(url, file_path, timeout=(9.05, 31.1)):
''' Supply an http get request and stream the response to a file.
Parameters
----------
url : str
Send the request to this url
file_path : str
Stream the response to this path
timeout : float or tuple of float, optional
Specify a timeout for the request. If a tuple, specify seperate connect
and read timeouts.
'''
with closing(requests.get(url, stream=True, timeout=timeout)) as response:
response.raise_for_status()
with open(file_path, 'wb') as fil:
stream.stream_response_to_file(response, path=fil)