#!/bin/env python3 from rdflib import ConjunctiveGraph, Graph from rdflib.namespace import RDF, Namespace, DCTERMS DCAT = Namespace('http://www.w3.org/ns/dcat#') ds = ConjunctiveGraph('Sleepycat') ds.open('datasets.db') print(f'Datasets store has {len(ds)} triples') print(f' and {len(list(ds.triples((None, RDF.type, DCAT.Dataset))))} datasets.') gss = Graph('SPARQLStore', identifier='http://gss-data.org.uk') gss.open("http://gss-data.org.uk/sparql") for dataset in gss.query(""" PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dct: <http://purl.org/dc/terms/> SELECT ?id ?url ?issued WHERE { ?id a dcat:Dataset ; dcat:landingPage ?url ; dct:issued ?issued . }"""): latest_pub_date = ds.value(subject=dataset.url, predicate=DCTERMS.issued, any=False) if latest_pub_date is None: print(f"Dataset {dataset.url} not listed in gov.uk statistical datasets.") elif latest_pub_date != dataset.issued: print(f"Dataset {dataset.url} has more recent update.") else: print(f"Dataset {dataset.url} is up to date.") gss.close() ds.close()