Source code for allensdk.api.api

# Allen Institute Software License - This software license is the 2-clause BSD
# license plus a third clause that prohibits redistribution for commercial
# purposes without further permission.
#
# Copyright 2017. Allen Institute. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Redistributions for commercial purposes are not permitted without the
# Allen Institute's written permission.
# For purposes of this license, commercial purposes is the incorporation of the
# Allen Institute's software into anything for which you will charge fees or
# other compensation. Contact terms@alleninstitute.org for commercial licensing
# opportunities.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

from contextlib import closing
import logging
import os
import errno
import warnings
import io
import zipfile

import requests
import pandas as pd
from requests_toolbelt import exceptions
from requests_toolbelt.downloadutils import stream

import allensdk.core.json_utilities as json_utilities


[docs]class Api(object): _log = logging.getLogger('allensdk.api.api') _file_download_log = logging.getLogger('allensdk.api.api.retrieve_file_over_http') default_api_url = 'http://api.brain-map.org' download_url = 'http://download.alleninstitute.org' def __init__(self, api_base_url_string=None): if api_base_url_string is None: api_base_url_string = Api.default_api_url self.set_api_urls(api_base_url_string) self.default_working_directory = os.getcwd()
[docs] def set_api_urls(self, api_base_url_string): '''Set the internal RMA and well known file download endpoint urls based on a api server endpoint. Parameters ---------- api_base_url_string : string url of the api to point to ''' self.api_url = api_base_url_string # http://help.brain-map.org/display/api/Downloading+a+WellKnownFile self.well_known_file_endpoint = api_base_url_string + \ '/api/v2/well_known_file_download' # http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data self.grid_data_endpoint = api_base_url_string + '/grid_data' # http://help.brain-map.org/display/api/Downloading+and+Displaying+SVG self.svg_endpoint = api_base_url_string + '/api/v2/svg' self.svg_download_endpoint = api_base_url_string + '/api/v2/svg_download' # http://help.brain-map.org/display/api/Downloading+an+Ontology%27s+Structure+Graph self.structure_graph_endpoint = api_base_url_string + \ '/api/v2/structure_graph_download' # http://help.brain-map.org/display/api/Searching+a+Specimen+or+Structure+Tree self.tree_search_endpoint = api_base_url_string + '/api/v2/tree_search' # http://help.brain-map.org/display/api/Searching+Annotated+SectionDataSets self.annotated_section_data_sets_endpoint = api_base_url_string + \ '/api/v2/annotated_section_data_sets' self.compound_annotated_section_data_sets_endpoint = api_base_url_string + \ '/api/v2/compound_annotated_section_data_sets' # http://help.brain-map.org/display/api/Image-to-Image+Synchronization#Image-to-ImageSynchronization-ImagetoImage self.image_to_atlas_endpoint = api_base_url_string + '/api/v2/image_to_atlas' self.image_to_image_endpoint = api_base_url_string + '/api/v2/image_to_image' self.image_to_image_2d_endpoint = api_base_url_string + '/api/v2/image_to_image_2d' self.reference_to_image_endpoint = api_base_url_string + '/api/v2/reference_to_image' self.image_to_reference_endpoint = api_base_url_string + '/api/v2/image_to_reference' self.structure_to_image_endpoint = api_base_url_string + '/api/v2/structure_to_image' # http://help.brain-map.org/display/mouseconnectivity/API self.section_image_download_endpoint = api_base_url_string + \ '/api/v2/section_image_download' self.atlas_image_download_endpoint = api_base_url_string + \ '/api/v2/atlas_image_download' self.projection_image_download_endpoint = api_base_url_string + \ '/api/v2/projection_image_download' self.image_download_endpoint = api_base_url_string + \ '/api/v2/image_download' self.informatics_archive_endpoint = Api.download_url + '/informatics-archive' self.rma_endpoint = api_base_url_string + '/api/v2/data'
[docs] def set_default_working_directory(self, working_directory): '''Set the working directory where files will be saved. Parameters ---------- working_directory : string the absolute path string of the working directory. ''' self.default_working_directory = working_directory
[docs] def read_data(self, parsed_json): '''Return the message data from the parsed query. Parameters ---------- parsed_json : dict A python structure corresponding to the JSON data returned from the API. Notes ----- See `API Response Formats - Response Envelope <http://help.brain-map.org/display/api/API+Response+Formats#APIResponseFormats-ResponseEnvelope>`_ for additional documentation. ''' return parsed_json['msg']
[docs] def json_msg_query(self, url, dataframe=False): ''' Common case where the url is fully constructed and the response data is stored in the 'msg' field. Parameters ---------- url : string Where to get the data in json form dataframe : boolean True converts to a pandas dataframe, False (default) doesn't Returns ------- dict or DataFrame returned data; type depends on dataframe option ''' data = self.do_query(lambda *a, **k: url, self.read_data) if dataframe is True: warnings.warn("dataframe argument is deprecated", DeprecationWarning) data = pd.DataFrame(data) return data
[docs] def do_query(self, url_builder_fn, json_traversal_fn, *args, **kwargs): '''Bundle an query url construction function with a corresponding response json traversal function. Parameters ---------- url_builder_fn : function A function that takes parameters and returns an rma url. json_traversal_fn : function A function that takes a json-parsed python data structure and returns data from it. post : boolean, optional kwarg True does an HTTP POST, False (default) does a GET args : arguments Arguments to be passed to the url builder function. kwargs : keyword arguments Keyword arguments to be passed to the rma builder function. Returns ------- any type The data extracted from the json response. Examples -------- `A simple Api subclass example <data_api_client.html#creating-new-api-query-classes>`_. ''' api_url = url_builder_fn(*args, **kwargs) post = kwargs.get('post', False) json_parsed_data = self.retrieve_parsed_json_over_http(api_url, post) return json_traversal_fn(json_parsed_data)
[docs] def do_rma_query(self, rma_builder_fn, json_traversal_fn, *args, **kwargs): '''Bundle an RMA query url construction function with a corresponding response json traversal function. ..note:: Deprecated in AllenSDK 0.9.2 `do_rma_query` will be removed in AllenSDK 1.0, it is replaced by `do_query` because the latter is more general. Parameters ---------- rma_builder_fn : function A function that takes parameters and returns an rma url. json_traversal_fn : function A function that takes a json-parsed python data structure and returns data from it. args : arguments Arguments to be passed to the rma builder function. kwargs : keyword arguments Keyword arguments to be passed to the rma builder function. Returns ------- any type The data extracted from the json response. Examples -------- `A simple Api subclass example <data_api_client.html#creating-new-api-query-classes>`_. ''' return self.do_query(rma_builder_fn, json_traversal_fn, *args, **kwargs)
[docs] def load_api_schema(self): '''Download the RMA schema from the current RMA endpoint Returns ------- dict the parsed json schema message Notes ----- This information and other `Allen Brain Atlas Data Portal Data Model <http://help.brain-map.org/display/api/Data+Model>`_ documentation is also available as a `Class Hierarchy <http://api.brain-map.org/class_hierarchy>`_ and `Class List <http://api.brain-map.org/class_hierarchy>`_. ''' schema_url = self.rma_endpoint + '/enumerate.json' json_parsed_schema_data = self.retrieve_parsed_json_over_http( schema_url) return json_parsed_schema_data
[docs] def construct_well_known_file_download_url(self, well_known_file_id): '''Join data api endpoint and id. Parameters ---------- well_known_file_id : integer or string representing an integer well known file id Returns ------- string the well-known-file download url for the current api api server See Also -------- retrieve_file_over_http: Can be used to retrieve the file from the url. ''' return self.well_known_file_endpoint + '/' + str(well_known_file_id)
[docs] def cleanup_truncated_file(self, file_path): '''Helper for removing files. Parameters ---------- file_path : string Absolute path including the file name to remove.''' try: os.remove(file_path) except OSError as e: if e.errno != errno.ENOENT: raise
[docs] def retrieve_file_over_http(self, url, file_path, zipped=False): '''Get a file from the data api and save it. Parameters ---------- url : string Url[1]_ from which to get the file. file_path : string Absolute path including the file name to save. zipped : bool, optional If true, assume that the response is a zipped directory and attempt to extract contained files into the directory containing file_path. Default is False. See Also -------- construct_well_known_file_download_url: Can be used to construct the url. References ---------- .. [1] Allen Brain Atlas Data Portal: `Downloading a WellKnownFile <http://help.brain-map.org/display/api/Downloading+a+WellKnownFile>`_. ''' self._file_download_log.info("Downloading URL: %s", url) try: if zipped: stream_zip_directory_over_http(url, os.path.dirname(file_path)) else: stream_file_over_http(url, file_path) except exceptions.StreamingError as e: self._file_download_log.error("Couldn't retrieve file %s from %s (streaming)." % (file_path,url)) self.cleanup_truncated_file(file_path) raise except requests.exceptions.ConnectionError as e: self._file_download_log.error("Couldn't retrieve file %s from %s (connection)." % (file_path,url)) self.cleanup_truncated_file(file_path) raise except requests.exceptions.ReadTimeout as e: self._file_download_log.error("Couldn't retrieve file %s from %s (timeout)." % (file_path,url)) self.cleanup_truncated_file(file_path) raise except requests.exceptions.RequestException as e: self._file_download_log.error("Couldn't retrieve file %s from %s (request)." % (file_path,url)) self.cleanup_truncated_file(file_path) raise except Exception as e: self._file_download_log.error("Couldn't retrieve file %s from %s" % (file_path, url)) self.cleanup_truncated_file(file_path) raise
[docs] def retrieve_parsed_json_over_http(self, url, post=False): '''Get the document and put it in a Python data structure Parameters ---------- url : string Full API query url. post : boolean True does an HTTP POST, False (default) encodes the URL and does a GET Returns ------- dict Result document as parsed by the JSON library. ''' self._log.info("Downloading URL: %s", url) if post is False: data = json_utilities.read_url_get( requests.utils.quote(url, ';/?:@&=+$,')) else: data = json_utilities.read_url_post(url) return data
[docs] def retrieve_xml_over_http(self, url): '''Get the document and put it in a Python data structure Parameters ---------- url : string Full API query url. Returns ------- string Unparsed xml string. ''' self._log.info("Downloading URL: %s", url) response = requests.get(url) return response.content
[docs]def stream_zip_directory_over_http(url, directory, members=None, timeout=(9.05, 31.1)): ''' Supply an http get request and stream the response to a file. Parameters ---------- url : str Send the request to this url directory : str Extract the response to this directory members : list of str, optional Extract only these files timeout : float or tuple of float, optional Specify a timeout for the request. If a tuple, specify seperate connect and read timeouts. ''' buf = io.BytesIO() with closing( requests.get(url, stream=True, timeout=timeout) ) as request: stream.stream_response_to_file( request, buf ) zipper = zipfile.ZipFile(buf) zipper.extractall(path=directory, members=members) zipper.close()
[docs]def stream_file_over_http(url, file_path, timeout=(9.05, 31.1)): ''' Supply an http get request and stream the response to a file. Parameters ---------- url : str Send the request to this url file_path : str Stream the response to this path timeout : float or tuple of float, optional Specify a timeout for the request. If a tuple, specify seperate connect and read timeouts. ''' with closing(requests.get(url, stream=True, timeout=timeout)) as response: response.raise_for_status() with open(file_path, 'wb') as fil: stream.stream_response_to_file(response, path=fil)