Source code for acdh_arche_pyutils.client

import rdflib
import requests
import yaml
import os
from acdh_arche_pyutils.utils import (
    camel_to_snake,
    create_query_sting,
    id_from_uri
)


[docs]class ArcheApiClient(): """Main Class to interact with ARCHE-API """ def __init__( self, arche_endpoint, out_dir='.' ): """ initializes the class :param arche_endpoint: The ARCHE endpoint e.g. `https://arche-dev.acdh-dev.oeaw.ac.at/api/` :type endpoint: str :param out_dir: a path to serialize data to, defaults to '.' :type out_dir: str :return: A ArcheApiClient instance :rtype: class:`achd_arch_pyutils.client.ArcheApiClient` """ super().__init__() self.endpoint = arche_endpoint self.out_dir = out_dir self.describe_url = f"{arche_endpoint}describe" print(f'Fetching description for endpoint: {self.endpoint}') self.info = requests.get(self.describe_url) self.description = yaml.load(self.info.text, Loader=yaml.FullLoader) self.rest = self.description['rest'] self.schema = self.description['schema'] self.base_url = self.rest['urlBase'] self.path_base = self.rest['pathBase'] self.fetched_endpoint = f"{self.base_url}{self.path_base}" for key, value in self.schema.items(): if isinstance(value, str): setattr(self, camel_to_snake(key), value) for key, value in self.schema['classes'].items(): if isinstance(value, str): setattr(self, camel_to_snake(key), value)
[docs] def top_col_ids(self): """returns of list of tuples (hasIdentifier, hasTitle) of all TopCollection""" query_params = { "property[0]": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "value[0]": self.top_collection, "readMode": 'ids' } query_string = create_query_sting(query_params) r = requests.get(f"{self.fetched_endpoint}search?{query_string}") g = rdflib.Graph().parse(data=r.text, format='ttl') items = [ ( str(x[0]), str(x[1]) ) for x in g.subject_objects(predicate=rdflib.URIRef(self.label)) ] return items
[docs] def get_resource(self, res_uri): """ fetches the given resource and its ancestors/parents :param res_uri: an ARCHE URI :type res_uri: str :return: A `rdflib.Graph` object :rtype: rdflib.Graph """ query_params = { "readMode": "relatives", "parents[0]": self.parent, } query_string = create_query_sting(query_params) url = f"{res_uri}/metadata?{query_string}" print(f"fetching data for URI: {res_uri}, calling endpoint \n {url}") r = requests.get(url) g = rdflib.Graph().parse(data=r.text, format='ttl') return g
[docs] def write_resource_to_file(self, res_uri, format='ttl'): """ writes a resource (and its parents/children) to file on disk :param res_uri: An ARCHE-URI :type res_uri: str :param format: The serialisation format, defaults to 'ttl' -> turtle\ use 'xml' for RDF/XML :type format: str :return: The location of the file :rtype: str """ file_name = f"{id_from_uri(res_uri)}.{format}" save_path = os.path.join(self.out_dir, file_name) if format == 'ttl': format = 'turtle' else: format = 'xml' os.makedirs(os.path.dirname(save_path), exist_ok=True) g = self.get_resource(res_uri) g.serialize(save_path, format=format, encoding='utf8') return save_path