From 1d3d2f97c1d60c6feab0ff1df0c04a56bfa9702e Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Tue, 25 Sep 2018 15:57:59 +0200 Subject: [PATCH 01/21] Add test for starting on empty repository and restarting the store --- tests/test_app.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_app.py b/tests/test_app.py index 5dfa7f47..04427dee 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1703,6 +1703,44 @@ def testInsertDataIntoEmptyRepository(self): "p": {'type': 'uri', 'value': 'http://ex.org/b'}, "o": {'type': 'uri', 'value': 'http://ex.org/c'}}) + def testInsertDataIntoEmptyRepositoryStopRestart(self): + """Test inserting data starting with an empty directory, restarting quit and selecting it. + + 1. Prepare an empty directory + 2. Start Quit + 3. execute INSERT DATA query + 4. Restart Quit + 4. execute SELECT query + """ + # Prepate a git Repository + with TemporaryDirectory() as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + + # execute INSERT DATA query + update = "INSERT DATA {graph { .}}" + app.post('/sparql', data=dict(update=update)) + + # Restart Quit + re_app = create_app(config).test_client() + + # execute SELECT query + select = "SELECT * WHERE {graph {?s ?p ?o .}} ORDER BY ?s ?p ?o" + select_resp = re_app.post('/sparql', data=dict(query=select), headers=dict(accept="application/sparql-results+json")) + + obj = json.loads(select_resp.data.decode("utf-8")) + + self.assertEqual(len(obj["results"]["bindings"]), 1) + + self.assertDictEqual(obj["results"]["bindings"][0], { + "s": {'type': 'uri', 'value': 'http://ex.org/a'}, + "p": {'type': 'uri', 'value': 'http://ex.org/b'}, + "o": {'type': 'uri', 'value': 'http://ex.org/c'}}) + def testInsertDataAndSelectFromEmptyGraph(self): """Test inserting data and selecting it, starting with an empty graph. From ccb2496ba352be83df62fc0b005c3d4becf92ff6 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Mon, 3 Sep 2018 14:10:44 +0200 Subject: [PATCH 02/21] Refactor config Remove configmode Split store and graph config Use git repo instead of file system Use graphfiles or configfile that were found in repository --- quit/conf.py | 488 ++++++++++++++++++++------------------------- quit/core.py | 1 + tests/helpers.py | 56 +++++- tests/test_conf.py | 379 +++++++++++------------------------ 4 files changed, 390 insertions(+), 534 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index 6b7f9468..73336705 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -1,8 +1,9 @@ import logging import os +from pygit2 import Repository from os import walk -from os.path import join, isfile +from os.path import join, isfile, relpath from quit.exceptions import MissingConfigurationError, InvalidConfigurationError from quit.exceptions import UnknownConfigurationError from quit.helpers import isAbsoluteUri @@ -25,14 +26,15 @@ class Feature: class QuitConfiguration: - """A class that keeps track of the relation between named graphs and files.""" + quit = Namespace('http://quit.aksw.org/vocab/') +class QuitStoreConfiguration(QuitConfiguration): + """A class that provides information about settings, filesystem and git.""" def __init__( self, - configmode=None, configfile='config.ttl', features=None, - repository=None, + upstream=None, targetdir=None, namespace=None ): @@ -48,33 +50,24 @@ def __init__( self.features = features self.configchanged = False self.sysconf = Graph() - self.graphconf = None - self.origin = None - self.graphs = {} - self.files = {} + self.upstream = None self.namespace = None - self.quit = Namespace('http://quit.aksw.org/vocab/') self.nsMngrSysconf = NamespaceManager(self.sysconf) self.nsMngrSysconf.bind('', 'http://quit.aksw.org/vocab/', override=False) - self.nsMngrGraphconf = NamespaceManager(self.sysconf) - self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False) try: self.__initstoreconfig( namespace=namespace, - repository=repository, + upstream=upstream, targetdir=targetdir, - configfile=configfile, - configmode=configmode + configfile=configfile ) except InvalidConfigurationError as e: logger.error(e) raise e - return - - def __initstoreconfig(self, namespace, repository, targetdir, configfile, configmode): + def __initstoreconfig(self, namespace, upstream, targetdir, configfile): """Initialize store settings.""" if isAbsoluteUri(namespace): self.namespace = namespace @@ -93,63 +86,165 @@ def __initstoreconfig(self, namespace, repository, targetdir, configfile, config except PermissionError: raise InvalidConfigurationError( "Configuration file could not be parsed. Permission denied. {}".format( - configfile - ) - ) + configfile)) except Exception as e: - raise UnknownConfigurationError( - "UnknownConfigurationError: {}".format(e) - ) + raise UnknownConfigurationError("UnknownConfigurationError: {}".format(e)) self.configfile = configfile else: if not targetdir: raise InvalidConfigurationError('No target directory for git repo given') - if configmode: - self.setConfigMode(configmode) - if targetdir: self.setRepoPath(targetdir) - if repository: - self.setGitOrigin(repository) + if upstream: + self.setGitUpstream(upstream) + + return + + def hasFeature(self, flags): + return flags == (self.features & flags) + + def getBindings(self): + ns = Namespace('http://quit.aksw.org/vocab/') + q = """SELECT DISTINCT ?prefix ?namespace WHERE {{ + {{ + ?ns a <{binding}> ; + <{predicate_prefix}> ?prefix ; + <{predicate_namespace}> ?namespace . + }} + }}""".format( + binding=ns['Binding'], predicate_prefix=ns['prefix'], + predicate_namespace=ns['namespace'] + ) + + result = self.sysconf.query(q) + return [(row['prefix'], row['namespace']) for row in result] + + def getDefaultBranch(self): + """Get the default branch on the Git repository from configuration. + + Returns: + A string containing the branch name. + """ + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = URIRef(nsQuit + 'defaultBranch') + + for s, p, o in self.sysconf.triples((None, property, None)): + return str(o) + + return "master" + + def getGlobalFile(self): + """Get the graph file which should be used for unassigned graphs. + + Returns + The filename of the graph file where unassigned graphs should be stored. + + """ + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = URIRef(nsQuit + 'globalFile') + + for s, p, o in self.sysconf.triples((None, property, None)): + return str(o) + + def getRepoPath(self): + """Get the path of Git repository from configuration. + + Returns: + A string containig the path of the git repo. + """ + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = URIRef(nsQuit + 'pathOfGitRepo') + + for s, p, o in self.sysconf.triples((None, property, None)): + return str(o) + + def getUpstream(self): + """Get the URI of Git remote from configuration.""" + nsQuit = 'http://quit.aksw.org/vocab/' + storeuri = URIRef('http://my.quit.conf/store') + property = self.quit.upstream + + for s, p, o in self.sysconf.triples((storeuri, property, None)): + return str(o) + + def setUpstream(self, origin): + self.sysconf.remove((None, self.quit.origin, None)) + self.sysconf.add((self.quit.Store, self.quit.upstream, Literal(origin))) return - def initgraphconfig(self): + def setRepoPath(self, path): + self.sysconf.remove((None, self.quit.pathOfGitRepo, None)) + self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path))) + + return + + +class QuitGraphConfiguration(QuitConfiguration): + """A class that keeps track of the relation between named graphs and files.""" + + def __init__(self, repository): + """The init method. + + This method checks if the config file is given and reads the config file. + If the config file is missing, it will be generated after analyzing the + file structure. + """ + logger = logging.getLogger('quit.conf.QuitConfiguration') + logger.debug('Initializing configuration object.') + + self.repository = repository + self.configfile = None + self.mode = None + self.graphconf = None + self.graphs = {} + self.files = {} + + def initgraphconfig(self, rev): """Initialize graph settings. Public method to initalize graph settings. This method will be run only once. """ if self.graphconf is None: - self.__initgraphconfig() - - def __initgraphconfig(self, repository=None, targetdir=None): - """Initialize graph settings.""" - self.graphconf = Graph() - configmode = self.getConfigMode() - logger.debug("Graph Config mode is: {}".format(configmode)) - - if configmode == 'localconfig': - self.__initgraphsfromconf(self.configfile) - elif configmode == 'repoconfig': - remConfigFile = join(self.getRepoPath(), 'config.ttl') - self.__initgraphsfromconf(remConfigFile) - elif configmode == 'graphfiles': - self.__initgraphsfromdir(self.getRepoPath()) + self.graphconf = Graph() + self.nsMngrGraphconf = NamespaceManager(self.graphconf) + self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False) + + rdf_files, config_files = self.get_files_from_repository(rev) + + if len(rdf_files) == 0 and len(config_files) == 0: + raise InvalidConfigurationError( + "Did not find graphfiles or a QuitStore configuration file.") + elif len(rdf_files) > 0 and len(config_files) > 0: + raise InvalidConfigurationError( + "Conflict. Found graphfiles and QuitStore configuration file.") + elif len(rdf_files) > 0: + self.mode = 'graphfiles' + self.__init_graph_conf_with_blobs(rdf_files, rev) + elif len(config_files) == 1: + self.mode = 'configuration' + self.__init_graph_conf_from_configuration(config_files[0], rev) else: - raise InvalidConfigurationError('This mode is not supported.', self.configmode) - return + raise InvalidConfigurationError( + "Conflict. Found more than one QuitStore configuration file.") - def __initgraphsfromdir(self, repodir): - """Init a repository by analyzing all existing files.""" - graphs = self.getgraphsfromdir(repodir) - repopath = self.getRepoPath() + try: + self.__read_graph_conf() + except InvalidConfigurationError as e: + raise e - for file, format in graphs.items(): - absgraphfile = os.path.join(repopath, file + '.graph') - graphuri = self.__readGraphIriFile(absgraphfile) + def __init_graph_conf_with_blobs(self, files, rev): + """Init a repository by analyzing all existing files.""" + for file, values in files.items(): + format = values[0] + graphFileId = values[1] + graphuri = self.__getUriFromGraphfileBlob(graphFileId) if graphuri and format == 'nquads': self.addgraph(file=file, graphuri=graphuri, format=format) @@ -157,11 +252,9 @@ def __initgraphsfromdir(self, repodir): tmpgraph = ConjunctiveGraph(identifier='default') try: - tmpgraph.parse(source=os.path.join(repopath, file), format=format) + tmpgraph.parse(source=os.path.join(file), format=format) except Exception: - logger.error( - "Could not parse graphfile {}. File skipped.".format(file) - ) + logger.error("Could not parse file {}. File skipped.".format(file)) continue namedgraphs = tmpgraph.contexts() @@ -176,41 +269,33 @@ def __initgraphsfromdir(self, repodir): self.addgraph(file=file, graphuri=graphuri, format=format) elif len(founduris) > 1: logger.info("No named graph found. {} skipped.".format(file)) - elif len(founduris) < 1: logger.info( - "More than one named graphs found. Can't decide. {} skipped.".format( - file - ) - ) - + "More than one named graphs found. Can't decide. {} skipped.".format(file)) elif format == 'nt': if graphuri: self.addgraph(file=file, graphuri=graphuri, format=format) else: logger.warning('No *.graph file found. ' + file + ' skipped.') + def __init_graph_conf_from_configuration(self, configfileId): + """Init graphs with setting from config.ttl.""" try: - self.__setgraphsfromconf() - except InvalidConfigurationError as e: - raise e + configfile = self.repository.get(configfileId) + except: + raise InvalidConfigurationError( + "Blob for configfile with id {} not found in repository {}".format(configfileId, e)) - def __initgraphsfromconf(self, configfile): - """Init graphs with setting from config.ttl.""" - if not isfile(configfile): - raise MissingConfigurationError("Configfile is missing {}".format(configfile)) + content = configfile.read_raw() try: - self.graphconf.parse(configfile, format='turtle') + self.graphconf.parse(data=content, format='turtle') except Exception as e: raise InvalidConfigurationError( - "Configfile could not be parsed {} {}".format(configfile, e) + "Configfile could not be parsed {} {}".format(configfileId, e) ) - # Get Graphs - self.__setgraphsfromconf() - - def __readGraphIriFile(self, graphfile): + def __getUriFromGraphfileBlob(self, id): """Search for a graph uri in graph file and return it. Args: @@ -219,86 +304,55 @@ def __readGraphIriFile(self, graphfile): Returns: graphuri: String with the graph URI """ - try: - with open(graphfile, 'r') as f: - graphuri = f.readline().strip() - except FileNotFoundError: - logger.debug("File not found {}".format(graphfile)) - return - - try: - urlparse(graphuri) - logger.debug("Graph URI {} found in {}".format(graphuri, graphfile)) - except Exception: - graphuri = None - logger.debug("No graph URI found in {}".format(graphfile)) - - return graphuri - - def __setgraphsfromconf(self): + blob = self.repository.get(id) + content = blob.read_raw().decode().strip() + uri = urlparse(content.strip()) + # try: + # with open(graphfile, 'r') as f: + # graphuri = f.readline().strip() + # except FileNotFoundError: + # logger.debug("File not found {}".format(graphfile)) + # return + # + # try: + # urlparse(graphuri) + # logger.debug("Graph URI {} found in {}".format(graphuri, graphfile)) + # except Exception: + # graphuri = None + # logger.debug("No graph URI found in {}".format(graphfile)) + + return content + + def __read_graph_conf(self): """Set all URIs and file paths of graphs that are configured in config.ttl.""" nsQuit = 'http://quit.aksw.org/vocab/' - query = 'SELECT DISTINCT ?graphuri ?filename WHERE { ' + query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { ' query += ' ?graph a <' + nsQuit + 'Graph> . ' query += ' ?graph <' + nsQuit + 'graphUri> ?graphuri . ' query += ' ?graph <' + nsQuit + 'graphFile> ?filename . ' + query += ' OPTIONAL { ?graph <' + nsQuit + 'hasFormat> ?format .} ' query += '}' result = self.graphconf.query(query) - repopath = self.getRepoPath() - for row in result: filename = str(row['filename']) - format = guess_format(filename) + if row['format'] is None: + format = guess_format(filename) + else: + format = str(row['format']) if format not in ['nt', 'nquads']: break - graphuri = str(row['graphuri']) - - graphFile = join(repopath, filename) - - if isfile(graphFile): - # everything is fine - pass - else: - try: - open(graphFile, 'a+').close() - except PermissionError: - raise InvalidConfigurationError( - "Permission denied. Can't create file {} in repo {}".format( - graphFile, - self.getRepoPath() - ) - ) - except FileNotFoundError: - raise InvalidConfigurationError( - "File not found. Can't create file {} in repo {}".format( - graphFile, - self.getRepoPath() - ) - ) - except Exception as e: - raise UnknownConfigurationError( - "Can't create file {} in repo {}. Error: {}".format( - graphFile, - self.getRepoPath(), - e - ) - ) - - graphuri = URIRef(graphuri) + graphuri = URIRef(str(row['graphuri'])) # we store which named graph is serialized in which file self.graphs[graphuri] = filename # and furthermore we assume that one file can contain data of more # than one named graph and so we store for each file a set of graphs - if filename in self.files: + if filename in self.files.keys(): self.files[filename]['graphs'].append(graphuri) else: - self.files[filename] = { - 'serialization': format, - 'graphs': [graphuri] - } + self.files[filename] = {'serialization': format, 'graphs': [graphuri]} return @@ -316,81 +370,13 @@ def removegraph(self, graphuri): return - def getConfigMode(self): - """Get the mode how Quit-Store detects RDF files and named graphs. - - Returns: - A string containig the mode. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - property = URIRef(nsQuit + 'configMode') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - return 'graphfiles' - - def getRepoPath(self): - """Get the path of Git repository from configuration. - - Returns: - A string containig the path of the git repo. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'pathOfGitRepo') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - def getDefaultBranch(self): - """Get the default branch on the Git repository from configuration. - - Returns: - A string containing the branch name. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'defaultBranch') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - return "master" - - def getGlobalFile(self): - """Get the graph file which should be used for unassigned graphs. - - Returns: - The filename of the graph file where unassigned graphs should be stored. - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'globalFile') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - - def getOrigin(self): - """Get the URI of Git remote from configuration.""" - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'origin') - - for s, p, o in self.sysconf.triples((storeuri, property, None)): - return str(o) - def getgraphs(self): """Get all graphs known to conf. Returns: A list containig all graph uris as string, """ - graphs = [] - for graph in self.graphs: - graphs.append(graph) - - return graphs + return self.graphs def getfiles(self): """Get all files known to conf. @@ -398,11 +384,7 @@ def getfiles(self): Returns: A list containig all files as string, """ - files = [] - for file in self.files: - files.append(file) - - return files + return self.files def getfileforgraphuri(self, graphuri): """Get the file for a given graph uri. @@ -415,9 +397,9 @@ def getfileforgraphuri(self, graphuri): """ if isinstance(graphuri, str): graphuri = URIRef(graphuri) - for uri, filename in self.graphs.items(): - if uri == graphuri: - return filename + + if graphuri in self.graphs.keys(): + return self.graphs[graphuri] return @@ -439,7 +421,7 @@ def getserializationoffile(self, file): Returns: A string containing the RDF serialization of file """ - if file in self.files: + if file in self.files.keys(): return self.files[file]['serialization'] return @@ -458,61 +440,31 @@ def getgraphuriforfile(self, file): return [] - def getgraphsfromdir(self, path=None): - """Get the files that are part of the repository (tracked or not). + def get_files_from_repository(self, rev): + """Get rdf files and QuitStore configuration files from git repository. Returns: - A list of filepathes. + A dictionary filepathes and format and a list of configuration files. """ - if path is None: - path = self.getRepoPath() - - exclude = set(['.git']) - + configfiles = [] graphfiles = {} - for dirpath, dirs, files in walk(path): - dirs[:] = [d for d in dirs if d not in exclude] - for filename in files: - - format = guess_format(join(dirpath, filename)) - if format is not None: - graphfiles[filename] = format - - return graphfiles - - def hasFeature(self, flags): - return flags == (self.features & flags) - - def setConfigMode(self, mode): - self.sysconf.remove((None, self.quit.configMode, None)) - self.sysconf.add((self.quit.Store, self.quit.configMode, Literal(mode))) - - return - - def setGitOrigin(self, origin): - self.sysconf.remove((None, self.quit.origin, None)) - self.sysconf.add((self.quit.Store, self.quit.origin, Literal(origin))) - - return - - def setRepoPath(self, path): - self.sysconf.remove((None, self.quit.pathOfGitRepo, None)) - self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path))) - - return - - def getBindings(self): - ns = Namespace('http://quit.aksw.org/vocab/') - q = """SELECT DISTINCT ?prefix ?namespace WHERE {{ - {{ - ?ns a <{binding}> ; - <{predicate_prefix}> ?prefix ; - <{predicate_namespace}> ?namespace . - }} - }}""".format( - binding=ns['Binding'], predicate_prefix=ns['prefix'], - predicate_namespace=ns['namespace'] - ) - - result = self.sysconf.query(q) - return [(row['prefix'], row['namespace']) for row in result] + commit = self.repository.revparse_single(rev) + graph_file_blobs = {} + + # Collect grahfiles + for entry in commit.tree: + if entry.type == 'blob' and entry.name.endswith('.graph'): + graph_file_blobs[entry.name] = entry.id + + # Collect RDF files and configfiles + for entry in commit.tree: + if entry.type == 'blob': + format = guess_format(entry.name) + if entry.name.endswith('quit.ttl') or entry.name.endswith('config.ttl'): + configfiles.append(entry.id) + elif format is not None and format in ['nquads', 'nt']: + if str(entry.name) + '.graph' in graph_file_blobs.keys(): + graphFileBlobId = graph_file_blobs[entry.name + '.graph'] + graphfiles[str(entry.name)] = (format, graphFileBlobId) + + return graphfiles, configfiles diff --git a/quit/core.py b/quit/core.py index 69951b91..615f9038 100644 --- a/quit/core.py +++ b/quit/core.py @@ -98,6 +98,7 @@ def __init__(self, config, repository, store): self.store = store self._commits = Cache() self._blobs = Cache() + self._configs = Cache() def _exists(self, cid): uri = QUIT['commit-' + cid] diff --git a/tests/helpers.py b/tests/helpers.py index 1e87a3a6..294e2620 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -2,6 +2,7 @@ from pygit2 import init_repository, clone_repository, Signature from os import path, walk from os.path import join +from rdflib import Graph def createCommit(repository, message=None): @@ -95,8 +96,43 @@ def withGraph(self, graphUri, graphContent=None): return tmpRepo - def withGraphs(self, graphUriContentDict): + def noConfigInformations(self, graphContent=''): + """Give a TemporaryRepository() initialized with a graph with the given content (and one commit).""" + tmpRepo = TemporaryRepository() + + # Add a graph.nq and a graph.nq.graph file + with open(path.join(tmpRepo.repo.workdir, "graph.nq"), "w") as graphFile: + if graphContent: + graphFile.write(graphContent) + + # Add and Commit the empty graph + index = tmpRepo.repo.index + index.read() + index.add("graph.nq") + index.write() + + # Create commit + tree = index.write_tree() + message = "init" + tmpRepo.repo.create_commit('HEAD', self.author, self.comitter, message, tree, []) + + return tmpRepo + + def withGraphs(self, graphUriContentDict, mode='graphfiles'): """Give a TemporaryRepository() initialized with a dictionary of graphUris and content (nq).""" + uristring = '' + configFileContent = """@base . + @prefix conf: . + + conf:store a ; + "git://github.com/aksw/QuitStore.git" ; + "{}" . + {}""" + + graphResource = """conf:graph{} a ; + <{}> ; + "{}" .""" + tmpRepo = TemporaryRepository() index = tmpRepo.repo.index index.read() @@ -108,14 +144,24 @@ def withGraphs(self, graphUriContentDict): if graphContent: graphFile.write(graphContent) - # Set Graph URI to http://example.org/ - with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile: - graphFile.write(graphUri) + if mode == 'graphfiles': + # Set Graph URI to http://example.org/ + with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile: + graphFile.write(graphUri) + index.add(filename + '.graph') + elif mode == 'configfile': + uristring += graphResource.format(i, graphUri, filename) # Add and Commit the empty graph index.add(filename) - index.add(filename + '.graph') i += 1 + if mode == 'configfile': + graph = Graph() + with open(path.join(tmpRepo.repo.workdir, "config.ttl"), "w") as configFile: + rdf_content = configFileContent.format(tmpRepo.repo.workdir, uristring) + graph.parse(format='turtle', data=rdf_content) + configFile.write(graph.serialize(format='turtle').decode()) + index.add('config.ttl') index.write() diff --git a/tests/test_conf.py b/tests/test_conf.py index 53e57fa8..db78c331 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -6,280 +6,137 @@ from os.path import join, isdir from pygit2 import init_repository, Repository, clone_repository from pygit2 import GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE, Signature -from quit.conf import QuitConfiguration +from quit.conf import QuitStoreConfiguration, QuitGraphConfiguration from quit.exceptions import MissingConfigurationError, InvalidConfigurationError from quit.exceptions import MissingFileError from distutils.dir_util import copy_tree, remove_tree +from helpers import TemporaryRepository, TemporaryRepositoryFactory from tempfile import TemporaryDirectory, NamedTemporaryFile import rdflib -class TestConfiguration(unittest.TestCase): - - def setUp(self): - self.ns = 'http://quit.instance/' - self.testData = './tests/samples/configuration_test' - self.local = './tests/samples/local' - self.remote = '.tests/samples/remote' - copy_tree(self.testData, self.local) - copy_tree(self.testData, self.remote) - self.localConfigFile = join(self.local, 'config.ttl') - self.remoteConfigFile = join(self.local, 'config.ttl') - tempRepoLine = ' "' + self.local + '" .' - - with open(self.localConfigFile) as f: - content = f.readlines() - - remove(self.localConfigFile) - - with open(self.localConfigFile, 'w+') as f: - for line in content: - if line.startswith(' Date: Thu, 13 Sep 2018 16:24:34 +0200 Subject: [PATCH 03/21] Apply usage of graphconfig Make use of QuitGraphConfiguration in core.py. Remove calls to methods of QuitGraphConfiguraion on startup since no instance of QuitGraphConfiguraition is initialized at this moment. TODO: save OIDs in QuitGraphConfiguration to reduce iterations over commit entries. --- quit/application.py | 14 +--- quit/conf.py | 181 ++++++++++++++++++++++++-------------------- quit/core.py | 30 ++++++-- quit/web/app.py | 3 - 4 files changed, 127 insertions(+), 101 deletions(-) diff --git a/quit/application.py b/quit/application.py index cde40437..fbc433c9 100644 --- a/quit/application.py +++ b/quit/application.py @@ -1,7 +1,7 @@ import argparse import sys import os -from quit.conf import Feature, QuitConfiguration +from quit.conf import Feature, QuitStoreConfiguration from quit.exceptions import InvalidConfigurationError import rdflib.plugins.sparql from rdflib.plugins.sparql.algebra import SequencePath @@ -98,11 +98,10 @@ def sequencePathCompareGt(self, other): 'quit.plugins.serializers.results.htmlresults', 'HTMLResultSerializer') try: - config = QuitConfiguration( + config = QuitStoreConfiguration( configfile=args.configfile, targetdir=args.targetdir, - repository=args.repourl, - configmode=args.configmode, + upstream=args.repourl, features=args.features, namespace=args.namespace, ) @@ -111,14 +110,9 @@ def sequencePathCompareGt(self, other): sys.exit('Exiting quit') # since repo is handled, we can add graphs to config - config.initgraphconfig() - logger.info('QuitStore successfully running.') - logger.info('Known graphs: ' + str(config.getgraphs())) - logger.info('Known files: ' + str(config.getfiles())) + logger.info('QuitStore Configuration initialized.') logger.debug('Path of Gitrepo: ' + config.getRepoPath()) - logger.debug('Config mode: ' + str(config.getConfigMode())) - logger.debug('All RDF files found in Gitepo:' + str(config.getgraphsfromdir())) return {'config': config} diff --git a/quit/conf.py b/quit/conf.py index 73336705..beab095a 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -28,6 +28,7 @@ class Feature: class QuitConfiguration: quit = Namespace('http://quit.aksw.org/vocab/') + class QuitStoreConfiguration(QuitConfiguration): """A class that provides information about settings, filesystem and git.""" def __init__( @@ -216,38 +217,36 @@ def initgraphconfig(self, rev): self.nsMngrGraphconf = NamespaceManager(self.graphconf) self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False) - rdf_files, config_files = self.get_files_from_repository(rev) + graph_files, config_files, rdf_files = self.get_blobs_from_repository(rev) - if len(rdf_files) == 0 and len(config_files) == 0: + if len(graph_files) == 0 and len(config_files) == 0: raise InvalidConfigurationError( "Did not find graphfiles or a QuitStore configuration file.") - elif len(rdf_files) > 0 and len(config_files) > 0: + elif len(graph_files) > 0 and len(config_files) > 0: raise InvalidConfigurationError( "Conflict. Found graphfiles and QuitStore configuration file.") - elif len(rdf_files) > 0: + elif len(graph_files) > 0: self.mode = 'graphfiles' - self.__init_graph_conf_with_blobs(rdf_files, rev) + self.__init_graph_conf_with_blobs(graph_files, rev) elif len(config_files) == 1: self.mode = 'configuration' - self.__init_graph_conf_from_configuration(config_files[0], rev) + self.__init_graph_conf_from_configuration(config_files[0], rdf_files) else: raise InvalidConfigurationError( "Conflict. Found more than one QuitStore configuration file.") - try: - self.__read_graph_conf() - except InvalidConfigurationError as e: - raise e - def __init_graph_conf_with_blobs(self, files, rev): """Init a repository by analyzing all existing files.""" for file, values in files.items(): format = values[0] graphFileId = values[1] - graphuri = self.__getUriFromGraphfileBlob(graphFileId) + graphuri = URIRef(self.__get_uri_from_graphfile_blob(graphFileId)) if graphuri and format == 'nquads': self.addgraph(file=file, graphuri=graphuri, format=format) + self.graphs[graphuri] = file + self.files[file] = { + 'serialization': format, 'graphs': [graphuri], 'oid': files[file][1]} elif graphuri is None and format == 'nquads': tmpgraph = ConjunctiveGraph(identifier='default') @@ -267,6 +266,9 @@ def __init_graph_conf_with_blobs(self, files, rev): if len(founduris) == 1: self.addgraph(file=file, graphuri=graphuri, format=format) + self.graphs[graphuri] = file + self.files[file] = { + 'serialization': format, 'graphs': [graphuri], 'oid': files[file][1]} elif len(founduris) > 1: logger.info("No named graph found. {} skipped.".format(file)) elif len(founduris) < 1: @@ -275,14 +277,17 @@ def __init_graph_conf_with_blobs(self, files, rev): elif format == 'nt': if graphuri: self.addgraph(file=file, graphuri=graphuri, format=format) + self.graphs[graphuri] = file + self.files[file] = { + 'serialization': format, 'graphs': [graphuri], 'oid': files[file][1]} else: logger.warning('No *.graph file found. ' + file + ' skipped.') - def __init_graph_conf_from_configuration(self, configfileId): + def __init_graph_conf_from_configuration(self, configfileId, known_blobs): """Init graphs with setting from config.ttl.""" try: configfile = self.repository.get(configfileId) - except: + except Exception as e: raise InvalidConfigurationError( "Blob for configfile with id {} not found in repository {}".format(configfileId, e)) @@ -294,37 +299,6 @@ def __init_graph_conf_from_configuration(self, configfileId): raise InvalidConfigurationError( "Configfile could not be parsed {} {}".format(configfileId, e) ) - - def __getUriFromGraphfileBlob(self, id): - """Search for a graph uri in graph file and return it. - - Args: - graphfile: String containing the path of a graph file - - Returns: - graphuri: String with the graph URI - """ - blob = self.repository.get(id) - content = blob.read_raw().decode().strip() - uri = urlparse(content.strip()) - # try: - # with open(graphfile, 'r') as f: - # graphuri = f.readline().strip() - # except FileNotFoundError: - # logger.debug("File not found {}".format(graphfile)) - # return - # - # try: - # urlparse(graphuri) - # logger.debug("Graph URI {} found in {}".format(graphuri, graphfile)) - # except Exception: - # graphuri = None - # logger.debug("No graph URI found in {}".format(graphfile)) - - return content - - def __read_graph_conf(self): - """Set all URIs and file paths of graphs that are configured in config.ttl.""" nsQuit = 'http://quit.aksw.org/vocab/' query = 'SELECT DISTINCT ?graphuri ?filename ?format WHERE { ' query += ' ?graph a <' + nsQuit + 'Graph> . ' @@ -342,19 +316,43 @@ def __read_graph_conf(self): format = str(row['format']) if format not in ['nt', 'nquads']: break + if filename not in known_blobs.keys(): + break graphuri = URIRef(str(row['graphuri'])) # we store which named graph is serialized in which file self.graphs[graphuri] = filename - # and furthermore we assume that one file can contain data of more - # than one named graph and so we store for each file a set of graphs - if filename in self.files.keys(): - self.files[filename]['graphs'].append(graphuri) - else: - self.files[filename] = {'serialization': format, 'graphs': [graphuri]} + self.files[filename] = { + 'serialization': format, 'graphs': [graphuri], 'oid': known_blobs[filename]} - return + def __get_uri_from_graphfile_blob(self, oid): + """Search for a graph uri in graph file and return it. + + Args + ---- + oid: String oid of a graph file + + Returns + ------- + graphuri: String with the graph URI + + """ + try: + blob = self.repository.get(oid) + except ValueError: + logger.debug("Object with OID { } not found in repository.".format(oid)) + return + + content = blob.read_raw().decode().strip() + + try: + urlparse(content) + except Exception: + logger.debug("No graph URI found in blob with OID {}.".format(oid)) + return + + return content def addgraph(self, graphuri, file, format=None): self.graphconf.add((self.quit[quote(graphuri)], RDF.type, self.quit.Graph)) @@ -373,27 +371,34 @@ def removegraph(self, graphuri): def getgraphs(self): """Get all graphs known to conf. - Returns: + Returns + ------- A list containig all graph uris as string, + """ return self.graphs def getfiles(self): """Get all files known to conf. - Returns: + Returns + ------- A list containig all files as string, + """ return self.files def getfileforgraphuri(self, graphuri): """Get the file for a given graph uri. - Args: + Args + ---- graphuri: A String of the named graph - Returns: + Returns + ------- A string of the path to the file asociated with named graph + """ if isinstance(graphuri, str): graphuri = URIRef(graphuri) @@ -406,20 +411,24 @@ def getfileforgraphuri(self, graphuri): def getgraphurifilemap(self): """Get the dictionary of graphuris and their files. - Returns: - A dictionary of graphuris and information about their files. - """ + Returns + ------- + A dictionary of graphuris and information about their representation in repository. + """ return self.graphs def getserializationoffile(self, file): """Get the file for a given graph uri. - Args: + Args + ---- file: A String of a file path - Returns: + Returns + ------- A string containing the RDF serialization of file + """ if file in self.files.keys(): return self.files[file]['serialization'] @@ -429,42 +438,52 @@ def getserializationoffile(self, file): def getgraphuriforfile(self, file): """Get the file for a given graph uri. - Args: + Args + ---- file: A String of a file path - Returns: + Returns + ------- A set containing strings of graph uris asociated to that file + """ if file in self.files: return self.files[file]['graphs'] return [] - def get_files_from_repository(self, rev): - """Get rdf files and QuitStore configuration files from git repository. + def get_blobs_from_repository(self, rev): + """Analyze all blobs of a revision. + + Returns + ------- + A triple (dictionary, list, dictionary) + dict: containg names of rdf-files plus their format and oid of graph file. + list: containing names of config files. + dict: containing names rdf files plus format and oid. - Returns: - A dictionary filepathes and format and a list of configuration files. """ - configfiles = [] - graphfiles = {} + config_files = [] + graph_files = {} commit = self.repository.revparse_single(rev) graph_file_blobs = {} + rdf_file_blobs = {} - # Collect grahfiles - for entry in commit.tree: - if entry.type == 'blob' and entry.name.endswith('.graph'): - graph_file_blobs[entry.name] = entry.id - - # Collect RDF files and configfiles + # Collect graph files, rdf files and config files for entry in commit.tree: if entry.type == 'blob': format = guess_format(entry.name) - if entry.name.endswith('quit.ttl') or entry.name.endswith('config.ttl'): - configfiles.append(entry.id) + if format is None and entry.name.endswith('.graph'): + graph_file_blobs[entry.name] = entry.id elif format is not None and format in ['nquads', 'nt']: - if str(entry.name) + '.graph' in graph_file_blobs.keys(): - graphFileBlobId = graph_file_blobs[entry.name + '.graph'] - graphfiles[str(entry.name)] = (format, graphFileBlobId) + rdf_file_blobs[entry.name] = (entry.id, format) + elif format is not None and entry.name == 'config.ttl': + config_files.append(str(entry.id)) + + # collect pairs of rdf files and graph files + for filename in rdf_file_blobs.keys(): + if filename + '.graph' in graph_file_blobs.keys(): + graph_file_blob_id = graph_file_blobs[filename + '.graph'] + graph_files[filename] = (rdf_file_blobs[filename][1], str(graph_file_blob_id)) - return graphfiles, configfiles + return graph_files, config_files, rdf_file_blobs diff --git a/quit/core.py b/quit/core.py index 615f9038..34fb2995 100644 --- a/quit/core.py +++ b/quit/core.py @@ -10,7 +10,7 @@ from rdflib import Graph, ConjunctiveGraph, BNode, Literal from rdflib.plugins.serializers.nquads import _nq_row as _nq -from quit.conf import Feature +from quit.conf import Feature, QuitGraphConfiguration from quit.namespace import RDFS, FOAF, XSD, PROV, QUIT, is_a from quit.graphs import RewriteGraph, InMemoryAggregatedGraph from quit.utils import graphdiff, git_timestamp @@ -98,7 +98,7 @@ def __init__(self, config, repository, store): self.store = store self._commits = Cache() self._blobs = Cache() - self._configs = Cache() + self._graphconfigs = Cache() def _exists(self, cid): uri = QUIT['commit-' + cid] @@ -291,7 +291,12 @@ def changeset(self, commit): g.addN((s, p, o, op_uri) for s, p, o in triples) # Entities - map = self.config.getgraphurifilemap() + if commit.id not in self._graphconfigs: + graphconf = QuitGraphConfiguration(self.repository._repository) + graphconf.initgraphconfig(commit.id) + self._graphconfigs.set(commit.id, graphconf) + + map = self._graphconfigs.get(commit.id).getgraphurifilemap() for entity in commit.node().entries(recursive=True): # todo check if file was changed @@ -300,7 +305,7 @@ def changeset(self, commit): if entity.name not in map.values(): continue - graphUris = self.config.getgraphuriforfile(entity.name) + graphUris = self._graphconfigs.get(commit.id).getgraphuriforfile(entity.name) graphsFromConfig = set((Graph(identifier=i) for i in graphUris)) blob = (entity.name, entity.oid) @@ -358,10 +363,16 @@ def getFilesForCommit(self, commit): On Cache miss this method also updates the commits cache. """ - uriFileMap = self.config.getgraphurifilemap() if commit.id not in self._commits: + if commit.id not in self._graphconfigs: + graphconf = QuitGraphConfiguration(self.repository._repository) + graphconf.initgraphconfig(commit.id) + self._graphconfigs.set(commit.id, graphconf) + + uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap() blobs = set() + for entity in commit.node().entries(recursive=True): if entity.is_file: if entity.name not in uriFileMap.values(): @@ -377,13 +388,18 @@ def getFileReferenceAndContext(self, blob, commit): On Cache miss this method also updates teh commits cache. """ - uriFileMap = self.config.getgraphurifilemap() + if commit.id not in self._graphconfigs: + graphconf = QuitGraphConfiguration(self.repository._repository) + graphconf.initgraphconfig(commit.id) + self._graphconfigs.set(commit.id, graphconf) + + uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap() if blob not in self._blobs: (name, oid) = blob content = commit.node(path=name).content # content = self.repository._repository[oid].data - graphUris = self.config.getgraphuriforfile(name) + graphUris = self._graphconfigs.get(commit.id).getgraphuriforfile(name) graphsFromConfig = set((Graph(identifier=i) for i in graphUris)) tmp = ConjunctiveGraph() tmp.parse(data=content, format='nquads') diff --git a/quit/web/app.py b/quit/web/app.py index bc3689ed..95587128 100644 --- a/quit/web/app.py +++ b/quit/web/app.py @@ -107,9 +107,6 @@ def register_app(app, config): content = quit.store.store.serialize(format='trig').decode() logger.debug("Initialize store with following content: {}".format(content)) - logger.debug("Initialize store with following graphs: {}".format( - quit.config.getgraphurifilemap()) - ) app.config['quit'] = quit app.config['blame'] = Blame(quit) From 188e48f2c8fae323f578025eaa7841ef584a3c47 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Tue, 18 Sep 2018 16:14:16 +0200 Subject: [PATCH 04/21] Apply changsets to new graphs (Save current work) Add new graphs to repository. Add configfiles to commit (config.ttl or .graph files) Move _apply method to helpers. Add to methods to commit() method to make it easier to understand how commit() works. TODO: Check if/update all blobs are handled via git objects (not via os/index/file system). Test for new files. --- quit/conf.py | 8 +-- quit/core.py | 134 ++++++++++++++++++++++++++++-------------------- quit/helpers.py | 16 ++++++ 3 files changed, 99 insertions(+), 59 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index beab095a..837e1bcf 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -220,8 +220,7 @@ def initgraphconfig(self, rev): graph_files, config_files, rdf_files = self.get_blobs_from_repository(rev) if len(graph_files) == 0 and len(config_files) == 0: - raise InvalidConfigurationError( - "Did not find graphfiles or a QuitStore configuration file.") + self.mode = 'graphfiles' elif len(graph_files) > 0 and len(config_files) > 0: raise InvalidConfigurationError( "Conflict. Found graphfiles and QuitStore configuration file.") @@ -465,9 +464,12 @@ def get_blobs_from_repository(self, rev): """ config_files = [] graph_files = {} - commit = self.repository.revparse_single(rev) graph_file_blobs = {} rdf_file_blobs = {} + try: + commit = self.repository.revparse_single(rev) + except Exception: + return graph_files, config_files, rdf_file_blobs # Collect graph files, rdf files and config files for entry in commit.tree: diff --git a/quit/core.py b/quit/core.py index 34fb2995..66aa810c 100644 --- a/quit/core.py +++ b/quit/core.py @@ -2,20 +2,24 @@ import logging +from copy import copy + from pygit2 import GIT_MERGE_ANALYSIS_UP_TO_DATE from pygit2 import GIT_MERGE_ANALYSIS_FASTFORWARD from pygit2 import GIT_MERGE_ANALYSIS_NORMAL from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT from rdflib import Graph, ConjunctiveGraph, BNode, Literal -from rdflib.plugins.serializers.nquads import _nq_row as _nq from quit.conf import Feature, QuitGraphConfiguration +from quit.helpers import applyChangeset from quit.namespace import RDFS, FOAF, XSD, PROV, QUIT, is_a from quit.graphs import RewriteGraph, InMemoryAggregatedGraph from quit.utils import graphdiff, git_timestamp from quit.cache import Cache, FileReference +from urllib.parse import quote_plus + import subprocess logger = logging.getLogger('quit.core') @@ -159,7 +163,6 @@ def instance(self, commit_id=None, force=False): Returns: Instance of VirtualGraph representing the respective dataset """ - default_graphs = [] if commit_id: @@ -292,9 +295,7 @@ def changeset(self, commit): # Entities if commit.id not in self._graphconfigs: - graphconf = QuitGraphConfiguration(self.repository._repository) - graphconf.initgraphconfig(commit.id) - self._graphconfigs.set(commit.id, graphconf) + self.updateGraphConfig(commit.id) map = self._graphconfigs.get(commit.id).getgraphurifilemap() @@ -364,11 +365,12 @@ def getFilesForCommit(self, commit): On Cache miss this method also updates the commits cache. """ + if commit is None: + return set() + if commit.id not in self._commits: if commit.id not in self._graphconfigs: - graphconf = QuitGraphConfiguration(self.repository._repository) - graphconf.initgraphconfig(commit.id) - self._graphconfigs.set(commit.id, graphconf) + self.updateGraphConfig(commit.id) uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap() blobs = set() @@ -384,14 +386,12 @@ def getFilesForCommit(self, commit): return self._commits.get(commit.id) def getFileReferenceAndContext(self, blob, commit): - """Get the FielReference and Context for a given blob (name, oid) of a commit. + """Get the FileReference and Context for a given blob (name, oid) of a commit. On Cache miss this method also updates teh commits cache. """ if commit.id not in self._graphconfigs: - graphconf = QuitGraphConfiguration(self.repository._repository) - graphconf.initgraphconfig(commit.id) - self._graphconfigs.set(commit.id, graphconf) + self.updateGraphConfig(commit.id) uriFileMap = self._graphconfigs.get(commit.id).getgraphurifilemap() @@ -406,8 +406,7 @@ def getFileReferenceAndContext(self, blob, commit): contexts = set((context for context in tmp.contexts(None) if context.identifier in uriFileMap)) | graphsFromConfig quitWorkingData = (FileReference(name, content), contexts) - self._blobs.set( - blob, quitWorkingData) + self._blobs.set(blob, quitWorkingData) return quitWorkingData return self._blobs.get(blob) @@ -445,19 +444,44 @@ def build_message(message, kwargs): out.append('{}: "{}"'.format(k, v.replace('"', "\\\""))) return "\n".join(out) - def _apply(f, changeset, identifier): - """Update the FileReference (graph uri) of a file with help of the changeset.""" - for (op, triples) in changeset: - if op == 'additions': - for triple in triples: - # the internal _nq serializer appends '\n' - line = _nq(triple, identifier).rstrip() - f.add(line) - elif op == 'removals': - for triple in triples: - # the internal _nq serializer appends '\n' - line = _nq(triple, identifier).rstrip() - f.remove(line) + def _applyKnownGraphs(delta, blobs): + blobs_new = set() + for blob in blobs: + (fileName, oid) = blob + try: + file_reference, contexts = self.getFileReferenceAndContext(blob, parent_commit) + for context in contexts: + for entry in delta: + changeset = entry.get(context.identifier, None) + + if changeset: + applyChangeset(file_reference, changeset, context.identifier) + del(entry[context.identifier]) + + index.add(file_reference.path, file_reference.content) + + self._blobs.remove(blob) + blob = fileName, index.stash[file_reference.path][0] + self._blobs.set(blob, (file_reference, contexts)) + blobs_new.add(blob) + except KeyError: + pass + return blobs_new + + def _applyUnknownGraphs(delta): + new_contexts = {} + for entry in delta: + for identifier, changeset in entry.items(): + if isinstance(identifier, BNode) or identifier == 'default': + continue # TODO + + fileName = quote_plus(identifier + '.nq') + if identifier not in new_contexts.keys(): + new_contexts[identifier] = FileReference(fileName, '') + + fileReference = new_contexts[identifier] + applyChangeset(fileReference, changeset, identifier) + return new_contexts if not delta: return @@ -477,40 +501,32 @@ def _apply(f, changeset, identifier): pass index = self.repository.index(parent_commit_id) - for blob in blobs: - (fileName, oid) = blob - try: - file_reference, contexts = self.getFileReferenceAndContext(blob, parent_commit) - for context in contexts: - for entry in delta: - changeset = entry.get(context.identifier, None) + if parent_commit_id not in self._graphconfigs: + self.updateGraphConfig(parent_commit_id) - if changeset: - _apply(file_reference, changeset, context.identifier) - del(entry[context.identifier]) + graphconfig = self._graphconfigs.get(parent_commit_id) - index.add(file_reference.path, file_reference.content) + blobs_new = _applyKnownGraphs(delta, blobs) + new_contexts = _applyUnknownGraphs(delta) + new_config = copy(graphconfig) - self._blobs.remove(blob) - blob = fileName, index.stash[file_reference.path][0] - self._blobs.set(blob, (file_reference, contexts)) - blobs_new.add(blob) - except KeyError: - pass + for identifier, fileReference in new_contexts.items(): + # Add new blobs to repo + index.add(fileReference.path, fileReference.content) + if graphconfig.mode == 'graphfiles': + index.add(fileReference.path + '.graph', identifier) - unassigned = set() - f_name = self.config.getGlobalFile() or 'unassigned.nq' - f_new = FileReference(f_name, "") - for entry in delta: - for identifier, changeset in entry.items(): - unassigned.add(graph.store.get_context(identifier)) - _apply(f_new, changeset, graph.store.identifier) + # Update config + new_config.addgraph(identifier, fileReference.path, 'nquads') - index.add(f_new.path, f_new.content) - - blob = f_name, index.stash[f_new.path][0] - self._blobs.set(blob, (f_new, unassigned)) - blobs_new.add(blob) + # Update Cache and add new contexts to store + blob = fileReference.path, index.stash[fileReference.path][0] + context = set() + context.add(graph.store.get_context(identifier)) + self._blobs.set(blob, (fileReference, context)) + blobs_new.add(blob) + if graphconfig.mode == 'configuration': + index.add('config.ttl', new_config.graphconf.serialize(format='turtle').decode()) message = build_message(message, kwargs) author = self.repository._repository.default_signature @@ -545,3 +561,9 @@ def garbagecollection(self): except Exception as e: logger.debug('Git garbage collection failed to spawn.') logger.debug(e) + + def updateGraphConfig(self, commitId): + """Update the graph configuration for a given commit id.""" + graphconf = QuitGraphConfiguration(self.repository._repository) + graphconf.initgraphconfig(commitId) + self._graphconfigs.set(commitId, graphconf) diff --git a/quit/helpers.py b/quit/helpers.py index c466bb23..eb8a0f96 100644 --- a/quit/helpers.py +++ b/quit/helpers.py @@ -7,6 +7,7 @@ from rdflib.plugins.sparql.parserutils import CompValue, plist from rdflib.plugins.sparql.parser import parseQuery, parseUpdate from quit.tools.algebra import translateQuery, translateUpdate +from rdflib.plugins.serializers.nquads import _nq_row as _nq from rdflib.plugins.sparql import parser, algebra from rdflib.plugins import sparql from uritools import urisplit @@ -106,6 +107,21 @@ def evalUpdate(self, querystring, graph): return +def applyChangeset(f, changeset, identifier): + """Update the FileReference (graph uri) of a file with help of the changeset.""" + for (op, triples) in changeset: + if op == 'additions': + for triple in triples: + # the internal _nq serializer appends '\n' + line = _nq(triple, identifier).rstrip() + f.add(line) + elif op == 'removals': + for triple in triples: + # the internal _nq serializer appends '\n' + line = _nq(triple, identifier).rstrip() + f.remove(line) + + def isAbsoluteUri(uri): """Check if a URI is a absolute URI and uses 'http(s)' at protocol part. From 5b782be742b19b4547dde7e91cf57d5abd0a0b0a Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Thu, 20 Sep 2018 14:20:21 +0200 Subject: [PATCH 05/21] Add tests for new graph via Insert Add two tests which add a new graph. Test if the new graph will be added for both modes (using .graph files and using a config.ttl) --- quit/core.py | 2 +- tests/test_app.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/quit/core.py b/quit/core.py index 66aa810c..802a8144 100644 --- a/quit/core.py +++ b/quit/core.py @@ -472,7 +472,7 @@ def _applyUnknownGraphs(delta): new_contexts = {} for entry in delta: for identifier, changeset in entry.items(): - if isinstance(identifier, BNode) or identifier == 'default': + if isinstance(identifier, BNode) or str(identifier) == 'default': continue # TODO fileName = quote_plus(identifier + '.nq') diff --git a/tests/test_app.py b/tests/test_app.py index 5dfa7f47..89470ac8 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2,6 +2,7 @@ import os from os import path +from urllib.parse import quote_plus from datetime import datetime from pygit2 import GIT_SORT_TOPOLOGICAL, Signature import quit.application as quitApp @@ -522,7 +523,6 @@ def testSelectFromNamed(self): "p": {'type': 'uri', 'value': 'urn:y'}, "o": {'type': 'uri', 'value': 'urn:z'}}) - def testQueryProvenanceViaGet(self): # Prepate a git Repository content = ' .' @@ -637,7 +637,9 @@ def testQueryProvenanceViaPostDirectly(self): response = app.post('/provenance', data=self.update, headers=headers) self.assertEqual(response.status_code, 400) + class QuitAppTestCase(unittest.TestCase): + """Test API and synchronization of Store and Git.""" author = Signature('QuitStoreTest', 'quit@quit.aksw.org') comitter = Signature('QuitStoreTest', 'quit@quit.aksw.org') @@ -3191,5 +3193,92 @@ def testWithOnInsertUsing(self): self.assertEqual('\n', f.read()) +class FileHandlingTests(unittest.TestCase): + def testNewNamedGraph(self): + """Test if a new graph is added to the repository. + + 1. Prepare a git repository with an empty and a non empty graph + 2. Start Quit + 3. execute Update query + 4. check filesystem for new .nq and .nq.graph file with expected content + """ + # Prepate a git Repository + content = ' .\n' + repoContent = {'http://example.org/': content} + with TemporaryRepositoryFactory().withGraphs(repoContent) as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + filename = quote_plus('http://aksw.org/') + '.nq' + + self.assertFalse(path.isfile(path.join(repo.workdir, filename))) + self.assertFalse(path.isfile(path.join(repo.workdir, filename + '.graph'))) + + # execute UPDATE query + update = 'INSERT DATA { GRAPH { . } }' + app.post('/sparql', + content_type="application/sparql-update", + data=update) + + with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, filename), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, filename + '.graph'), 'r') as f: + self.assertEqual('http://aksw.org/', f.read()) + + def testNewNamedGraphConfigfile(self): + """Test if a new graph is added to the repository. + + 1. Prepare a git repository with an empty and a non empty graph + 2. Start Quit + 3. execute Update query + 4. check filesystem and configfile content (before/after) + """ + # Prepate a git Repository + content = ' .\n' + repoContent = {'http://example.org/': content} + with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + + with open(path.join(repo.workdir, 'config.ttl'), 'r') as f: + configfile_before = f.read() + + # execute DELETE INSERT WHERE query + update = 'INSERT DATA { GRAPH { . } }' + app.post('/sparql', + content_type="application/sparql-update", + data=update) + + filename = quote_plus('http://aksw.org/') + '.nq' + + with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, filename), 'r') as f: + self.assertEqual(' .\n', f.read()) + with open(path.join(repo.workdir, 'config.ttl'), 'r') as f: + configfile_after = f.read() + + config_before = [x.strip() for x in configfile_before.split('\n')] + config_after = [x.strip() for x in configfile_after.split('\n')] + diff = list(set(config_after) - set(config_before)) + + self.assertFalse('ns1:graphFile "' + filename + '" ;' in config_before) + self.assertFalse('ns1:hasFormat "nquads" .' in config_before) + self.assertFalse('ns1:graphUri ;' in config_before) + + self.assertTrue('ns1:graphFile "' + filename + '" ;' in diff) + self.assertTrue('ns1:hasFormat "nquads" .' in diff) + self.assertTrue('ns1:graphUri ;' in diff) + + if __name__ == '__main__': unittest.main() From b19a3e9583f81f3c9ac6d1522c0b4ef2eb221c71 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 27 Sep 2018 15:48:56 +0200 Subject: [PATCH 06/21] Support for empty repositories is now added and thus also the config should be ok with no configured graph --- tests/test_conf.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_conf.py b/tests/test_conf.py index db78c331..fc7f6b3f 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -130,13 +130,6 @@ def testExistingRepoConfigfile(self): self.assertEqual(conf.getfileforgraphuri('http://aksw.org/'), 'graph_0.nq') self.assertEqual(conf.getfileforgraphuri('http://example.org/'), 'graph_1.nq') - def testInitWithMissingInformation(self): - """Start QuitStore without graphfiles and configfile.""" - with TemporaryRepositoryFactory().noConfigInformations() as repo: - - conf = QuitGraphConfiguration(repository=repo) - self.assertRaises(InvalidConfigurationError, conf.initgraphconfig, 'master') - def main(): unittest.main() From 09b98a60c04816659c79d2eafed7be7898320a2e Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 27 Sep 2018 16:12:19 +0200 Subject: [PATCH 07/21] Update skip reason for pull test on empty repo --- tests/test_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 89470ac8..71b59974 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2309,12 +2309,12 @@ def testPullEmptyInitialGraph(self): assertResultBindingsEqual(self, resultBindings, [afterPull]) - @unittest.skip("See https://github.com/AKSW/QuitStore/issues/81") + @unittest.skip("See https://github.com/AKSW/QuitStore/issues/171") def testPullStartFromEmptyRepository(self): """Test /pull API request starting the store from an empty repository. - CAUTION: This test is disabled, because we currently have problems starting a store when no - graph is configured. See https://github.com/AKSW/QuitStore/issues/81 + CAUTION: This test is disabled, because we currently have a problem with our pull + implementation. See https://github.com/AKSW/QuitStore/issues/171 """ graphContent = """ .""" From 4a833d02d15a5844cc861da2e25c2620d6e96a57 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Thu, 27 Sep 2018 16:15:53 +0200 Subject: [PATCH 08/21] Fix pull test to run on new configuration --- tests/test_app.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index 71b59974..9a7b32f4 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2309,12 +2309,8 @@ def testPullEmptyInitialGraph(self): assertResultBindingsEqual(self, resultBindings, [afterPull]) - @unittest.skip("See https://github.com/AKSW/QuitStore/issues/171") def testPullStartFromEmptyRepository(self): """Test /pull API request starting the store from an empty repository. - - CAUTION: This test is disabled, because we currently have a problem with our pull - implementation. See https://github.com/AKSW/QuitStore/issues/171 """ graphContent = """ .""" @@ -2336,7 +2332,7 @@ def testPullStartFromEmptyRepository(self): self.assertEqual(len(resultBindings), 0) - response = app.get('/pull/origin') + response = app.get('/pull/origin/master') self.assertEqual(response.status, '200 OK') afterPull = {'s': {'type': 'uri', 'value': 'http://ex.org/x'}, From cefff77833492f91983ab76b16063c68b43eb924 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Mon, 1 Oct 2018 14:12:31 +0200 Subject: [PATCH 09/21] Remove unused class, fix test, update TODO Remove old QuitConfiguration which was used as a very small superclass. Fix test for base namespaces. Add TODO reason. --- quit/conf.py | 24 ++++++++++++------------ quit/core.py | 2 +- tests/test_conf.py | 17 ++++++----------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index 837e1bcf..3a4034e4 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -25,12 +25,11 @@ class Feature: All = Provenance | Persistence | GarbageCollection -class QuitConfiguration: - quit = Namespace('http://quit.aksw.org/vocab/') +class QuitStoreConfiguration(): + """A class that provides information about settings, filesystem and git.""" + quit = Namespace('http://quit.aksw.org/vocab/') -class QuitStoreConfiguration(QuitConfiguration): - """A class that provides information about settings, filesystem and git.""" def __init__( self, configfile='config.ttl', @@ -39,7 +38,7 @@ def __init__( targetdir=None, namespace=None ): - """The init method. + """Initialize store configuration. This method checks if the config file is given and reads the config file. If the config file is missing, it will be generated after analyzing the @@ -55,15 +54,14 @@ def __init__( self.namespace = None self.nsMngrSysconf = NamespaceManager(self.sysconf) - self.nsMngrSysconf.bind('', 'http://quit.aksw.org/vocab/', override=False) + self.nsMngrSysconf.bind('', self.quit, override=False) try: self.__initstoreconfig( namespace=namespace, upstream=upstream, targetdir=targetdir, - configfile=configfile - ) + configfile=configfile) except InvalidConfigurationError as e: logger.error(e) raise e @@ -100,7 +98,7 @@ def __initstoreconfig(self, namespace, upstream, targetdir, configfile): self.setRepoPath(targetdir) if upstream: - self.setGitUpstream(upstream) + self.setUpstream(upstream) return @@ -187,11 +185,13 @@ def setRepoPath(self, path): return -class QuitGraphConfiguration(QuitConfiguration): +class QuitGraphConfiguration(): """A class that keeps track of the relation between named graphs and files.""" + quit = Namespace('http://quit.aksw.org/vocab/') + def __init__(self, repository): - """The init method. + """Init graph configuration. This method checks if the config file is given and reads the config file. If the config file is missing, it will be generated after analyzing the @@ -215,7 +215,7 @@ def initgraphconfig(self, rev): if self.graphconf is None: self.graphconf = Graph() self.nsMngrGraphconf = NamespaceManager(self.graphconf) - self.nsMngrGraphconf.bind('', 'http://quit.aksw.org/vocab/', override=False) + self.nsMngrGraphconf.bind('', self.quit, override=False) graph_files, config_files, rdf_files = self.get_blobs_from_repository(rev) diff --git a/quit/core.py b/quit/core.py index 802a8144..f2628364 100644 --- a/quit/core.py +++ b/quit/core.py @@ -473,7 +473,7 @@ def _applyUnknownGraphs(delta): for entry in delta: for identifier, changeset in entry.items(): if isinstance(identifier, BNode) or str(identifier) == 'default': - continue # TODO + continue # TODO default graph use case fileName = quote_plus(identifier + '.nq') if identifier not in new_contexts.keys(): diff --git a/tests/test_conf.py b/tests/test_conf.py index fc7f6b3f..93cdc36e 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -16,30 +16,24 @@ class TestConfiguration(unittest.TestCase): - ns = 'http://quit.instance/' def testNamespace(self): content1 = ' .' repoContent = {'http://example.org/': content1} with TemporaryRepositoryFactory().withGraphs(repoContent) as repo: - - # missing namespace - self.assertRaises( - InvalidConfigurationError, QuitStoreConfiguration, 'targetdir', repo.workdir) - good = ['http://example.org/thing#', 'https://example.org/', 'http://example.org/things/'] - bad = ['file:///home/quit/', 'urn:graph/', 'urn:graph', '../test'] + bad = [None, 'file:///home/quit/', 'urn:graph/', 'urn:graph', '../test'] - # good namespaces + # good base namespaces for uri in good: conf = QuitStoreConfiguration(targetdir=repo.workdir, namespace=uri) self.assertEqual(conf.namespace, uri) - # bad namespaces + # bad base namespaces for uri in bad: - self.assertRaises( - InvalidConfigurationError, QuitStoreConfiguration, 'targetdir', repo.workdir, 'namespace', uri) + with self.assertRaises(InvalidConfigurationError): + QuitStoreConfiguration(targetdir=repo.workdir, namespace=uri) def testStoreConfigurationWithDir(self): content1 = ' .' @@ -47,6 +41,7 @@ def testStoreConfigurationWithDir(self): with TemporaryRepositoryFactory().withGraphs(repoContent) as repo: conf = QuitStoreConfiguration(targetdir=repo.workdir, namespace=self.ns) self.assertEqual(conf.getRepoPath(), repo.workdir) + self.assertEqual(conf.getDefaultBranch(), 'master') def testStoreConfigurationWithConfigfile(self): content1 = ' .' From 375ad6488445d9ad118122b6ea7744a37e6e8f67 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Fri, 5 Oct 2018 13:16:38 +0200 Subject: [PATCH 10/21] Clean store configuration Update usage of namespace. Remove attribute for file name of unassigned graphs. Remove property for unassigned file from vocab. Add test for upstream. --- quit/conf.py | 44 +++++++++----------------------------------- tests/test_conf.py | 14 ++++++++++++++ vocab/vocab.ttl | 3 --- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index 3a4034e4..1e21a731 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -29,6 +29,7 @@ class QuitStoreConfiguration(): """A class that provides information about settings, filesystem and git.""" quit = Namespace('http://quit.aksw.org/vocab/') + store = URIRef('http://quit.aksw.org/instance/store') def __init__( self, @@ -106,7 +107,6 @@ def hasFeature(self, flags): return flags == (self.features & flags) def getBindings(self): - ns = Namespace('http://quit.aksw.org/vocab/') q = """SELECT DISTINCT ?prefix ?namespace WHERE {{ {{ ?ns a <{binding}> ; @@ -114,8 +114,8 @@ def getBindings(self): <{predicate_namespace}> ?namespace . }} }}""".format( - binding=ns['Binding'], predicate_prefix=ns['prefix'], - predicate_namespace=ns['namespace'] + binding=self.quit['Binding'], predicate_prefix=self.quit['prefix'], + predicate_namespace=self.quit['namespace'] ) result = self.sysconf.query(q) @@ -127,60 +127,34 @@ def getDefaultBranch(self): Returns: A string containing the branch name. """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'defaultBranch') - - for s, p, o in self.sysconf.triples((None, property, None)): + for s, p, o in self.sysconf.triples((None, self.quit.defaultBranch, None)): return str(o) return "master" - def getGlobalFile(self): - """Get the graph file which should be used for unassigned graphs. - - Returns - The filename of the graph file where unassigned graphs should be stored. - - """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'globalFile') - - for s, p, o in self.sysconf.triples((None, property, None)): - return str(o) - def getRepoPath(self): """Get the path of Git repository from configuration. Returns: A string containig the path of the git repo. """ - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = URIRef(nsQuit + 'pathOfGitRepo') - - for s, p, o in self.sysconf.triples((None, property, None)): + for s, p, o in self.sysconf.triples((None, self.quit.pathOfGitRepo, None)): return str(o) def getUpstream(self): """Get the URI of Git remote from configuration.""" - nsQuit = 'http://quit.aksw.org/vocab/' - storeuri = URIRef('http://my.quit.conf/store') - property = self.quit.upstream - - for s, p, o in self.sysconf.triples((storeuri, property, None)): + for s, p, o in self.sysconf.triples((None, self.quit.upstream, None)): return str(o) def setUpstream(self, origin): - self.sysconf.remove((None, self.quit.origin, None)) - self.sysconf.add((self.quit.Store, self.quit.upstream, Literal(origin))) + self.sysconf.remove((None, self.quit.upstream, None)) + self.sysconf.add((self.store, self.quit.upstream, Literal(origin))) return def setRepoPath(self, path): self.sysconf.remove((None, self.quit.pathOfGitRepo, None)) - self.sysconf.add((self.quit.Store, self.quit.pathOfGitRepo, Literal(path))) + self.sysconf.add((self.store, self.quit.pathOfGitRepo, Literal(path))) return diff --git a/tests/test_conf.py b/tests/test_conf.py index 93cdc36e..5b8b38d4 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -49,6 +49,20 @@ def testStoreConfigurationWithConfigfile(self): with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo: conf = QuitStoreConfiguration(configfile=join(repo.workdir, 'config.ttl'), namespace=self.ns) self.assertEqual(conf.getRepoPath(), repo.workdir) + self.assertEqual(conf.getDefaultBranch(), 'master') + + def testStoreConfigurationUpstream(self): + content1 = ' .' + repoContent = {'http://example.org/': content1} + with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo: + conf = QuitStoreConfiguration( + configfile=join(repo.workdir, 'config.ttl'), + upstream='http://cool.repo.git', + namespace=self.ns) + self.assertEqual(conf.getRepoPath(), repo.workdir) + self.assertEqual(conf.getUpstream(), 'http://cool.repo.git') + + def testExistingRepoGraphFiles(self): content1 = ' .' diff --git a/vocab/vocab.ttl b/vocab/vocab.ttl index c35f824e..eaf37db5 100644 --- a/vocab/vocab.ttl +++ b/vocab/vocab.ttl @@ -31,9 +31,6 @@ quit:pathOfGitRepo a rdfs:Property ; quit:defaultBranch a rdfs:Property ; rdfs:comment "Branch used as default" . -quit:globalFile a rdfs:Property ; - rdfs:comment "File for unassigned graphs" . - quit:linkToGitRemote a rdfs:Property ; rdfs:comment "Link to the Git Remote" . From 058fbc6c7d418cd8a5cee9c43dea79f7b103fdc3 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Fri, 5 Oct 2018 16:06:55 +0200 Subject: [PATCH 11/21] Clearify Graph Configuration Remove parsing of each rdf file (searching named graphs). Move additions and removals of configuration data to add/reomve methods. Add test. --- quit/conf.py | 62 +++++++++++++++------------------------------- tests/test_conf.py | 26 +++++++++++++++++++ 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index 1e21a731..c5190644 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -215,46 +215,10 @@ def __init_graph_conf_with_blobs(self, files, rev): graphFileId = values[1] graphuri = URIRef(self.__get_uri_from_graphfile_blob(graphFileId)) - if graphuri and format == 'nquads': + if graphuri: self.addgraph(file=file, graphuri=graphuri, format=format) - self.graphs[graphuri] = file - self.files[file] = { - 'serialization': format, 'graphs': [graphuri], 'oid': files[file][1]} - elif graphuri is None and format == 'nquads': - tmpgraph = ConjunctiveGraph(identifier='default') - - try: - tmpgraph.parse(source=os.path.join(file), format=format) - except Exception: - logger.error("Could not parse file {}. File skipped.".format(file)) - continue - - namedgraphs = tmpgraph.contexts() - founduris = [] - - for graph in namedgraphs: - if not isinstance(graph, BNode) and str(graph.identifier) != 'default': - graphuri = graph.identifier - founduris.append(graphuri) - - if len(founduris) == 1: - self.addgraph(file=file, graphuri=graphuri, format=format) - self.graphs[graphuri] = file - self.files[file] = { - 'serialization': format, 'graphs': [graphuri], 'oid': files[file][1]} - elif len(founduris) > 1: - logger.info("No named graph found. {} skipped.".format(file)) - elif len(founduris) < 1: - logger.info( - "More than one named graphs found. Can't decide. {} skipped.".format(file)) - elif format == 'nt': - if graphuri: - self.addgraph(file=file, graphuri=graphuri, format=format) - self.graphs[graphuri] = file - self.files[file] = { - 'serialization': format, 'graphs': [graphuri], 'oid': files[file][1]} - else: - logger.warning('No *.graph file found. ' + file + ' skipped.') + else: + logger.warning('No *.graph file found. {} skipped'.format(file)) def __init_graph_conf_from_configuration(self, configfileId, known_blobs): """Init graphs with setting from config.ttl.""" @@ -328,16 +292,30 @@ def __get_uri_from_graphfile_blob(self, oid): return content def addgraph(self, graphuri, file, format=None): + graphuri_obj = URIRef(graphuri) + if graphuri_obj in self.graphs.keys(): + return + self.graphconf.add((self.quit[quote(graphuri)], RDF.type, self.quit.Graph)) self.graphconf.add((self.quit[quote(graphuri)], self.quit.graphUri, URIRef(graphuri))) self.graphconf.add((self.quit[quote(graphuri)], self.quit.graphFile, Literal(file))) + self.graphs[graphuri_obj] = file + if format is not None: self.graphconf.add((self.quit[quote(graphuri)], self.quit.hasFormat, Literal(format))) - - return + self.files[file] = {'serialization': format, 'graphs': [graphuri], 'oid': file} + else: + self.files[file] = {'graphs': [graphuri_obj], 'oid': file} def removegraph(self, graphuri): - self.graphconf.remove((self.quit[urlencode(graphuri)], None, None)) + self.graphconf.remove((self.quit[quote(graphuri)], None, None)) + + if not isinstance(graphuri, URIRef): + graphuri = URIRef(graphuri) + if graphuri in self.graphs.keys(): + filename = self.graphs[graphuri] + del self.files[filename] + del self.graphs[graphuri] return diff --git a/tests/test_conf.py b/tests/test_conf.py index 5b8b38d4..441766fe 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -139,6 +139,32 @@ def testExistingRepoConfigfile(self): self.assertEqual(conf.getfileforgraphuri('http://aksw.org/'), 'graph_0.nq') self.assertEqual(conf.getfileforgraphuri('http://example.org/'), 'graph_1.nq') + def testGraphConfigurationMethods(self): + content1 = ' .' + content2 = ' .\n' + content2 += ' .' + repoContent = {'http://example.org/': content1, 'http://aksw.org/': content2} + with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo: + conf = QuitGraphConfiguration(repository=repo) + conf.initgraphconfig('master') + + conf.removegraph('http://aksw.org/') + + self.assertEqual(conf.getgraphurifilemap(), { + rdflib.term.URIRef('http://example.org/'): 'graph_1.nq'}) + self.assertEqual(conf.getfileforgraphuri('http://aksw.org/'), None) + self.assertEqual(conf.getgraphuriforfile('graph_0.nq'), []) + self.assertEqual(conf.getserializationoffile('graph_0.nq'), None) + + conf.addgraph('http://aksw.org/', 'new_file.nq', 'nquads') + + self.assertEqual(conf.getgraphurifilemap(), { + rdflib.term.URIRef('http://aksw.org/'): 'new_file.nq', + rdflib.term.URIRef('http://example.org/'): 'graph_1.nq'}) + self.assertEqual(conf.getfileforgraphuri('http://aksw.org/'), 'new_file.nq') + self.assertEqual(conf.getgraphuriforfile('new_file.nq'), ['http://aksw.org/']) + self.assertEqual(conf.getserializationoffile('new_file.nq'), 'nquads') + def main(): unittest.main() From 9fe1d1098ea4172f3f0b3e1af9ba5d4a6893fc4e Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Mon, 8 Oct 2018 16:47:59 +0200 Subject: [PATCH 12/21] Add more graph configuration tests. --- tests/helpers.py | 46 +++++++++++++++++++++++++++++++++++++++++----- tests/test_conf.py | 18 ++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 294e2620..455e6faa 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -96,19 +96,28 @@ def withGraph(self, graphUri, graphContent=None): return tmpRepo - def noConfigInformations(self, graphContent=''): - """Give a TemporaryRepository() initialized with a graph with the given content (and one commit).""" + def withBothConfigurations(self): + """Give a TemporaryRepository() initialized with config.ttl and graph + graphfile.""" tmpRepo = TemporaryRepository() # Add a graph.nq and a graph.nq.graph file with open(path.join(tmpRepo.repo.workdir, "graph.nq"), "w") as graphFile: - if graphContent: - graphFile.write(graphContent) + graphFile.write('') - # Add and Commit the empty graph + # Set Graph URI to http://example.org/ + with open(path.join(tmpRepo.repo.workdir, "graph.nq.graph"), "w") as graphFile: + graphFile.write('http://example.org/') + + # Add config.ttl + with open(path.join(tmpRepo.repo.workdir, "config.ttl"), "w") as graphFile: + graphFile.write('') + + # Add and Commit index = tmpRepo.repo.index index.read() + index.add("config.ttl") index.add("graph.nq") + index.add("graph.nq.graph") index.write() # Create commit @@ -118,6 +127,33 @@ def noConfigInformations(self, graphContent=''): return tmpRepo + def withWrongInformationFile(self): + """Give a TemporaryRepository() with a config.ttl containing incorrect turtle content.""" + tmpRepo = TemporaryRepository() + + # Add config.ttl + with open(path.join(tmpRepo.repo.workdir, "config.ttl"), "w") as graphFile: + graphFile.write('This is not written in turtle syntax.') + + # Add and Commit + index = tmpRepo.repo.index + index.read() + index.add("config.ttl") + index.write() + + # Create commit + tree = index.write_tree() + message = "init" + tmpRepo.repo.create_commit('HEAD', self.author, self.comitter, message, tree, []) + + return tmpRepo + + def withNoConfigInformation(self): + """Give an empty TemporaryRepository().""" + tmpRepo = TemporaryRepository() + + return tmpRepo + def withGraphs(self, graphUriContentDict, mode='graphfiles'): """Give a TemporaryRepository() initialized with a dictionary of graphUris and content (nq).""" uristring = '' diff --git a/tests/test_conf.py b/tests/test_conf.py index 441766fe..49b5738c 100644 --- a/tests/test_conf.py +++ b/tests/test_conf.py @@ -72,6 +72,7 @@ def testExistingRepoGraphFiles(self): with TemporaryRepositoryFactory().withGraphs(repoContent) as repo: conf = QuitGraphConfiguration(repository=repo) conf.initgraphconfig('master') + self.assertEqual(conf.mode, 'graphfiles') graphs = conf.getgraphs() self.assertEqual( @@ -111,6 +112,7 @@ def testExistingRepoConfigfile(self): with TemporaryRepositoryFactory().withGraphs(repoContent, 'configfile') as repo: conf = QuitGraphConfiguration(repository=repo) conf.initgraphconfig('master') + self.assertEqual(conf.mode, 'configuration') graphs = conf.getgraphs() self.assertEqual(sorted([str(x) for x in graphs]), ['http://aksw.org/', 'http://example.org/']) @@ -165,6 +167,22 @@ def testGraphConfigurationMethods(self): self.assertEqual(conf.getgraphuriforfile('new_file.nq'), ['http://aksw.org/']) self.assertEqual(conf.getserializationoffile('new_file.nq'), 'nquads') + def testGraphConfigurationFailing(self): + with TemporaryRepositoryFactory().withBothConfigurations() as repo: + conf = QuitGraphConfiguration(repository=repo) + self.assertRaises(InvalidConfigurationError, conf.initgraphconfig, 'master') + + def testWrongConfigurationFile(self): + with TemporaryRepositoryFactory().withBothConfigurations() as repo: + conf = QuitGraphConfiguration(repository=repo) + self.assertRaises(InvalidConfigurationError, conf.initgraphconfig, 'master') + + def testNoConfigInformation(self): + with TemporaryRepositoryFactory().withNoConfigInformation() as repo: + conf = QuitGraphConfiguration(repository=repo) + conf.initgraphconfig('master') + self.assertEqual(conf.mode, 'graphfiles') + def main(): unittest.main() From 7bdd27bc3a5095fd79c09c92b759a1514bdbc518 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 22 Oct 2018 14:12:20 +0200 Subject: [PATCH 13/21] Some more docstrings --- quit/application.py | 7 ++++++- quit/conf.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/quit/application.py b/quit/application.py index fbc433c9..8437c026 100644 --- a/quit/application.py +++ b/quit/application.py @@ -118,6 +118,7 @@ def sequencePathCompareGt(self, other): class FeaturesAction(argparse.Action): + """Actions that are executied for the configuration passed with the `--feature` option.""" CHOICES = { 'provenance': Feature.Provenance, 'persistence': Feature.Persistence, @@ -139,7 +140,11 @@ def __call__(self, parser, namespace, values, option_string=None): def parseArgs(args): - """Parse command line arguments.""" + """Parse command line arguments. + + Returns: + parsed object representing the config arguments. + """ basepathhelp = "Base path (aka. application root) (WSGI only)." graphhelp = """This option tells QuitStore how to map graph files and named graph URIs: "localconfig" - Use the given local file for graph settings. diff --git a/quit/conf.py b/quit/conf.py index c5190644..97a37f4c 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -18,6 +18,7 @@ class Feature: + """Represents the fetures passed by the `--feature` parameter.""" Unknown = 0 Provenance = 1 << 0 Persistence = 1 << 1 From 33f7207808d94b1995741d93f34326fd4d6c8788 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Fri, 26 Oct 2018 14:34:11 +0200 Subject: [PATCH 14/21] Add check if hashed file name exists. --- quit/core.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/quit/core.py b/quit/core.py index f2628364..7d3a49cc 100644 --- a/quit/core.py +++ b/quit/core.py @@ -475,8 +475,16 @@ def _applyUnknownGraphs(delta): if isinstance(identifier, BNode) or str(identifier) == 'default': continue # TODO default graph use case - fileName = quote_plus(identifier + '.nq') if identifier not in new_contexts.keys(): + while True: + fileName = quote_plus(identifier + '.nq') + i = 0 + if fileName in blobs: + fileName = quote_plus(identifier + '_{}.nq'.format(i)) + else: + break + i+= 1 + new_contexts[identifier] = FileReference(fileName, '') fileReference = new_contexts[identifier] From ac21130bf72ac462feec0231b3795bd41b1f0246 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Fri, 26 Oct 2018 14:46:33 +0200 Subject: [PATCH 15/21] Fix pylava --- quit/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quit/core.py b/quit/core.py index 7d3a49cc..879c519d 100644 --- a/quit/core.py +++ b/quit/core.py @@ -483,7 +483,7 @@ def _applyUnknownGraphs(delta): fileName = quote_plus(identifier + '_{}.nq'.format(i)) else: break - i+= 1 + i += 1 new_contexts[identifier] = FileReference(fileName, '') From 9bfb0380bb689fd878e24c3620ce62fdb0e661bc Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Mon, 29 Oct 2018 16:47:24 +0100 Subject: [PATCH 16/21] Add test for filename collision --- quit/core.py | 21 +++++++-------- tests/helpers.py | 37 +++++++++++++++++++++++++++ tests/test_app.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 10 deletions(-) diff --git a/quit/core.py b/quit/core.py index 879c519d..3f0b7df7 100644 --- a/quit/core.py +++ b/quit/core.py @@ -10,6 +10,7 @@ from pygit2 import GIT_SORT_REVERSE, GIT_RESET_HARD, GIT_STATUS_CURRENT from rdflib import Graph, ConjunctiveGraph, BNode, Literal +import re from quit.conf import Feature, QuitGraphConfiguration from quit.helpers import applyChangeset @@ -468,7 +469,7 @@ def _applyKnownGraphs(delta, blobs): pass return blobs_new - def _applyUnknownGraphs(delta): + def _applyUnknownGraphs(delta, known_blobs): new_contexts = {} for entry in delta: for identifier, changeset in entry.items(): @@ -476,14 +477,13 @@ def _applyUnknownGraphs(delta): continue # TODO default graph use case if identifier not in new_contexts.keys(): - while True: - fileName = quote_plus(identifier + '.nq') - i = 0 - if fileName in blobs: - fileName = quote_plus(identifier + '_{}.nq'.format(i)) - else: - break - i += 1 + fileName = quote_plus(identifier) + '.nq' + + if fileName in known_blobs: + reg = re.compile("(" + quote_plus(identifier) + "_)([0-9]*)(.nq)") + # n ~ numbers (in blobname), b ~ blobname, m ~ match + n = [int(m.group(2)) for b in known_blobs for m in [reg.search(b)] if m] + fileName = quote_plus(identifier + '_{}.nq'.format(max(n)+1)) new_contexts[identifier] = FileReference(fileName, '') @@ -513,9 +513,10 @@ def _applyUnknownGraphs(delta): self.updateGraphConfig(parent_commit_id) graphconfig = self._graphconfigs.get(parent_commit_id) + known_files = graphconfig.getfiles().keys() blobs_new = _applyKnownGraphs(delta, blobs) - new_contexts = _applyUnknownGraphs(delta) + new_contexts = _applyUnknownGraphs(delta, known_files) new_config = copy(graphconfig) for identifier, fileReference in new_contexts.items(): diff --git a/tests/helpers.py b/tests/helpers.py index 455e6faa..7e6b1972 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -3,6 +3,7 @@ from os import path, walk from os.path import join from rdflib import Graph +from urllib.parse import quote_plus def createCommit(repository, message=None): @@ -96,6 +97,42 @@ def withGraph(self, graphUri, graphContent=None): return tmpRepo + def withHashedFileNames(self, graphContent='\n'): + """Give a TemporaryRepository() initialized with a graph name that will force a collision.""" + tmpRepo = TemporaryRepository() + + # Add a graph.nq and a graph.nq.graph file + identifier = quote_plus('http://aksw.org/') + + files = { + identifier + '.nq': ('http://example.org/', graphContent), + identifier + '_1.nq': ('urn:graph1', '\n'), + identifier + '_11.nq': ('urn:graph2', '\n')} + + for filename, (graph_iri, content) in files.items(): + with open(path.join(tmpRepo.repo.workdir, filename), 'w') as graph_file: + graph_file.write(content) + + # Set Graph URI to http://example.org/ + with open(path.join(tmpRepo.repo.workdir, filename + '.graph'), 'w') as graph_file: + graph_file.write(graph_iri) + + # Add and Commit the empty graph + index = tmpRepo.repo.index + index.read() + for filename, (graph_iri, content) in files.items(): + index.add(filename) + index.add(filename + '.graph') + index.write() + + # Create commit + tree = index.write_tree() + message = "init" + tmpRepo.repo.create_commit('HEAD', self.author, self.comitter, message, tree, []) + + return tmpRepo + + def withBothConfigurations(self): """Give a TemporaryRepository() initialized with config.ttl and graph + graphfile.""" tmpRepo = TemporaryRepository() diff --git a/tests/test_app.py b/tests/test_app.py index 38eb2738..7858fac0 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -3314,5 +3314,70 @@ def testNewNamedGraphConfigfile(self): self.assertTrue('ns1:graphUri ;' in diff) + def testFileNameCollision(self): + """Test if a new graph is added to the repository. + + 1. Prepare a git repository with files that use hashed names of a graph that will be inserted + 2. Start Quit + 3. check filesystem for filenames + 4. execute Update query + 5. check filesystem for filenames + """ + # Prepate a git Repository + content = ' .\n' + with TemporaryRepositoryFactory().withHashedFileNames(content) as repo: + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + + hashed_identifier = quote_plus('http://aksw.org/') + + files = { + hashed_identifier + '.nq': ('http://example.org/', content), + hashed_identifier + '_1.nq': ('urn:graph1', '\n'), + hashed_identifier + '_11.nq': ('urn:graph2', '\n')} + + commit = repo.revparse_single('master') + + for entry in commit.tree: + if entry.type == 'blob' and entry.name.endswith('.nq'): + self.assertTrue(entry.name in files.keys()) + else: + self.assertTrue(entry.name[:-6] in files.keys()) + + for filename, (graph_iri, content) in files.items(): + with open(path.join(repo.workdir, filename), 'r') as f: + self.assertEqual(content, f.read()) + with open(path.join(repo.workdir, filename + '.graph'), 'r') as f: + self.assertEqual(graph_iri, f.read()) + + # execute Update query + update = 'INSERT DATA { GRAPH { . } }' + app.post('/sparql', + content_type="application/sparql-update", + data=update) + + # add the new file we expext after Update Query + files[hashed_identifier + '_12.nq'] = ( + 'http://aksw.org/', ' .\n') + + commit = repo.revparse_single('master') + + for entry in commit.tree: + if entry.type == 'blob' and entry.name.endswith('.nq'): + self.assertTrue(entry.name in files.keys()) + else: + self.assertTrue(entry.name[:-6] in files.keys()) + + for filename, (graph_iri, content) in files.items(): + with open(path.join(repo.workdir, filename), 'r') as f: + self.assertEqual(content, f.read()) + with open(path.join(repo.workdir, filename + '.graph'), 'r') as f: + self.assertEqual(graph_iri, f.read()) + + if __name__ == '__main__': unittest.main() From aee4a89de993362afc260b23b16ad7d30f146038 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 29 Oct 2018 17:15:31 +0100 Subject: [PATCH 17/21] Fix case of first collision and regex escape identifier --- quit/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/quit/core.py b/quit/core.py index 3f0b7df7..578d0c1d 100644 --- a/quit/core.py +++ b/quit/core.py @@ -480,10 +480,11 @@ def _applyUnknownGraphs(delta, known_blobs): fileName = quote_plus(identifier) + '.nq' if fileName in known_blobs: - reg = re.compile("(" + quote_plus(identifier) + "_)([0-9]*)(.nq)") + reg = re.compile(re.escape(quote_plus(identifier)) + "_([0-9]*).nq") # n ~ numbers (in blobname), b ~ blobname, m ~ match - n = [int(m.group(2)) for b in known_blobs for m in [reg.search(b)] if m] - fileName = quote_plus(identifier + '_{}.nq'.format(max(n)+1)) + n = [int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m] + n.append(0) + fileName = '{}_{}.nq'.format(quote_plus(identifier), max(n)+1) new_contexts[identifier] = FileReference(fileName, '') From 6915f4a7aadcb2db11b4f5a16db10b477c85ea1f Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 29 Oct 2018 17:39:08 +0100 Subject: [PATCH 18/21] Fix _.nq. Make names nicer. --- quit/core.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/quit/core.py b/quit/core.py index 578d0c1d..9b116aa5 100644 --- a/quit/core.py +++ b/quit/core.py @@ -19,7 +19,7 @@ from quit.utils import graphdiff, git_timestamp from quit.cache import Cache, FileReference -from urllib.parse import quote_plus +from urllib.parse import quote_plus, urlparse import subprocess @@ -477,14 +477,15 @@ def _applyUnknownGraphs(delta, known_blobs): continue # TODO default graph use case if identifier not in new_contexts.keys(): - fileName = quote_plus(identifier) + '.nq' + fileName = _iriToName(identifier) + '.nq' if fileName in known_blobs: - reg = re.compile(re.escape(quote_plus(identifier)) + "_([0-9]*).nq") + reg = re.compile(re.escape(_iriToName(identifier)) + "_([0-9]+).nq") # n ~ numbers (in blobname), b ~ blobname, m ~ match - n = [int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m] - n.append(0) - fileName = '{}_{}.nq'.format(quote_plus(identifier), max(n)+1) + n = [ + int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m + ] + [0] + fileName = '{}_{}.nq'.format(_iriToName(identifier), max(n)+1) new_contexts[identifier] = FileReference(fileName, '') @@ -492,6 +493,13 @@ def _applyUnknownGraphs(delta, known_blobs): applyChangeset(fileReference, changeset, identifier) return new_contexts + def _iriToName(iri): + parsedIri = urlparse(iri) + nameParts = [parsedIri.netloc] + if parsedIri.path.strip("/"): + nameParts += parsedIri.path.strip("/").split("/") + return quote_plus("_".join(nameParts)) + if not delta: return From 5571e886b2736f005cd9a1c1970353e0bf95cd6f Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 29 Oct 2018 18:04:45 +0100 Subject: [PATCH 19/21] IRI to name method to utils --- quit/core.py | 17 ++++------------- quit/utils.py | 8 ++++++++ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/quit/core.py b/quit/core.py index 9b116aa5..8a560a80 100644 --- a/quit/core.py +++ b/quit/core.py @@ -16,11 +16,9 @@ from quit.helpers import applyChangeset from quit.namespace import RDFS, FOAF, XSD, PROV, QUIT, is_a from quit.graphs import RewriteGraph, InMemoryAggregatedGraph -from quit.utils import graphdiff, git_timestamp +from quit.utils import graphdiff, git_timestamp, iri_to_name from quit.cache import Cache, FileReference -from urllib.parse import quote_plus, urlparse - import subprocess logger = logging.getLogger('quit.core') @@ -477,15 +475,15 @@ def _applyUnknownGraphs(delta, known_blobs): continue # TODO default graph use case if identifier not in new_contexts.keys(): - fileName = _iriToName(identifier) + '.nq' + fileName = iri_to_name(identifier) + '.nq' if fileName in known_blobs: - reg = re.compile(re.escape(_iriToName(identifier)) + "_([0-9]+).nq") + reg = re.compile(re.escape(iri_to_name(identifier)) + "_([0-9]+).nq") # n ~ numbers (in blobname), b ~ blobname, m ~ match n = [ int(m.group(1)) for b in known_blobs for m in [reg.search(b)] if m ] + [0] - fileName = '{}_{}.nq'.format(_iriToName(identifier), max(n)+1) + fileName = '{}_{}.nq'.format(iri_to_name(identifier), max(n)+1) new_contexts[identifier] = FileReference(fileName, '') @@ -493,13 +491,6 @@ def _applyUnknownGraphs(delta, known_blobs): applyChangeset(fileReference, changeset, identifier) return new_contexts - def _iriToName(iri): - parsedIri = urlparse(iri) - nameParts = [parsedIri.netloc] - if parsedIri.path.strip("/"): - nameParts += parsedIri.path.strip("/").split("/") - return quote_plus("_".join(nameParts)) - if not delta: return diff --git a/quit/utils.py b/quit/utils.py index fa81d5b3..cf215322 100644 --- a/quit/utils.py +++ b/quit/utils.py @@ -7,6 +7,8 @@ from datetime import tzinfo, timedelta, datetime from quit.graphs import InMemoryAggregatedGraph from collections import OrderedDict +from urllib.parse import quote_plus, urlparse + ZERO = timedelta(0) HOUR = timedelta(hours=1) @@ -39,6 +41,12 @@ def git_timestamp(ts, offset): tz = tzinfo.TZ(offset, tzname) return datetime.fromtimestamp(ts, tz) +def iri_to_name(iri): + parsedIri = urlparse(iri) + nameParts = [parsedIri.netloc] + if parsedIri.path.strip("/"): + nameParts += parsedIri.path.strip("/").split("/") + return quote_plus("_".join(nameParts)) def sparqlresponse(result, format): """Create a FLASK HTTP response for sparql-result+json.""" From b106dd6e38d1e5ff68c62fd5a14a50e33dcb7ec0 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 29 Oct 2018 18:06:41 +0100 Subject: [PATCH 20/21] Remove useless and unflexible withHashedFileNames test helper method and fix tests --- tests/helpers.py | 35 ----------------------------------- tests/test_app.py | 32 ++++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 45 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 7e6b1972..646cf040 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -97,41 +97,6 @@ def withGraph(self, graphUri, graphContent=None): return tmpRepo - def withHashedFileNames(self, graphContent='\n'): - """Give a TemporaryRepository() initialized with a graph name that will force a collision.""" - tmpRepo = TemporaryRepository() - - # Add a graph.nq and a graph.nq.graph file - identifier = quote_plus('http://aksw.org/') - - files = { - identifier + '.nq': ('http://example.org/', graphContent), - identifier + '_1.nq': ('urn:graph1', '\n'), - identifier + '_11.nq': ('urn:graph2', '\n')} - - for filename, (graph_iri, content) in files.items(): - with open(path.join(tmpRepo.repo.workdir, filename), 'w') as graph_file: - graph_file.write(content) - - # Set Graph URI to http://example.org/ - with open(path.join(tmpRepo.repo.workdir, filename + '.graph'), 'w') as graph_file: - graph_file.write(graph_iri) - - # Add and Commit the empty graph - index = tmpRepo.repo.index - index.read() - for filename, (graph_iri, content) in files.items(): - index.add(filename) - index.add(filename + '.graph') - index.write() - - # Create commit - tree = index.write_tree() - message = "init" - tmpRepo.repo.create_commit('HEAD', self.author, self.comitter, message, tree, []) - - return tmpRepo - def withBothConfigurations(self): """Give a TemporaryRepository() initialized with config.ttl and graph + graphfile.""" diff --git a/tests/test_app.py b/tests/test_app.py index 7858fac0..e8ade5ce 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -12,6 +12,7 @@ import json from helpers import createCommit, assertResultBindingsEqual from tempfile import TemporaryDirectory +from quit.utils import iri_to_name class SparqlProtocolTests(unittest.TestCase): @@ -3246,7 +3247,7 @@ def testNewNamedGraph(self): objects = quitApp.initialize(args) config = objects['config'] app = create_app(config).test_client() - filename = quote_plus('http://aksw.org/') + '.nq' + filename = iri_to_name('http://aksw.org/') + '.nq' self.assertFalse(path.isfile(path.join(repo.workdir, filename))) self.assertFalse(path.isfile(path.join(repo.workdir, filename + '.graph'))) @@ -3292,7 +3293,7 @@ def testNewNamedGraphConfigfile(self): content_type="application/sparql-update", data=update) - filename = quote_plus('http://aksw.org/') + '.nq' + filename = iri_to_name('http://aksw.org/') + '.nq' with open(path.join(repo.workdir, 'graph_0.nq'), 'r') as f: self.assertEqual(' .\n', f.read()) @@ -3325,21 +3326,32 @@ def testFileNameCollision(self): """ # Prepate a git Repository content = ' .\n' - with TemporaryRepositoryFactory().withHashedFileNames(content) as repo: - - # Start Quit - args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) - objects = quitApp.initialize(args) - config = objects['config'] - app = create_app(config).test_client() + with TemporaryRepository() as repo: - hashed_identifier = quote_plus('http://aksw.org/') + hashed_identifier = iri_to_name('http://aksw.org/') files = { hashed_identifier + '.nq': ('http://example.org/', content), hashed_identifier + '_1.nq': ('urn:graph1', '\n'), hashed_identifier + '_11.nq': ('urn:graph2', '\n')} + # Prepare Git Repository + for filename, (graph_iri, content) in files.items(): + with open(path.join(repo.workdir, filename), 'w') as graph_file: + graph_file.write(content) + + # Set Graph URI to http://example.org/ + with open(path.join(repo.workdir, filename + '.graph'), 'w') as graph_file: + graph_file.write(graph_iri) + + createCommit(repo, "init") + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir, '-cm', 'graphfiles']) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + commit = repo.revparse_single('master') for entry in commit.tree: From 7b3fcdc39e0bbb2365a4ca08fd23e1eda88f8a97 Mon Sep 17 00:00:00 2001 From: Natanael Arndt Date: Mon, 29 Oct 2018 18:10:59 +0100 Subject: [PATCH 21/21] Add required blank lines --- quit/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quit/utils.py b/quit/utils.py index cf215322..f1779c2a 100644 --- a/quit/utils.py +++ b/quit/utils.py @@ -41,6 +41,7 @@ def git_timestamp(ts, offset): tz = tzinfo.TZ(offset, tzname) return datetime.fromtimestamp(ts, tz) + def iri_to_name(iri): parsedIri = urlparse(iri) nameParts = [parsedIri.netloc] @@ -48,6 +49,7 @@ def iri_to_name(iri): nameParts += parsedIri.path.strip("/").split("/") return quote_plus("_".join(nameParts)) + def sparqlresponse(result, format): """Create a FLASK HTTP response for sparql-result+json.""" return Response(