#!/bin/env python3
from rdflib import ConjunctiveGraph, Graph
from rdflib.namespace import RDF, Namespace, DCTERMS
DCAT = Namespace('http://www.w3.org/ns/dcat#')
ds = ConjunctiveGraph('Sleepycat')
ds.open('datasets.db')
print(f'Datasets store has {len(ds)} triples')
print(f' and {len(list(ds.triples((None, RDF.type, DCAT.Dataset))))} datasets.')
gss = Graph('SPARQLStore', identifier='http://gss-data.org.uk')
gss.open("http://gss-data.org.uk/sparql")
for dataset in gss.query("""
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dct: <http://purl.org/dc/terms/>
SELECT ?id ?url ?issued WHERE {
?id a dcat:Dataset ;
dcat:landingPage ?url ;
dct:issued ?issued .
}"""):
latest_pub_date = ds.value(subject=dataset.url, predicate=DCTERMS.issued, any=False)
if latest_pub_date is None:
print(f"Dataset {dataset.url} not listed in gov.uk statistical datasets.")
elif latest_pub_date != dataset.issued:
print(f"Dataset {dataset.url} has more recent update.")
else:
print(f"Dataset {dataset.url} is up to date.")
gss.close()
ds.close()