Gather some statistics about the datasets, vocabularies and codelists loaded into PMD.

In [1]:
from SPARQLWrapper import SPARQLWrapper2
import pandas as pd
from IPython.display import HTML

endpoint = "https://production-drafter-ons-alpha.publishmydata.com/v1/sparql/live"
sparql = SPARQLWrapper2(endpoint)

Find the number of observations in each dataset

In [2]:
sparql.setQuery("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX qb:   <http://purl.org/linked-data/cube#>

SELECT (COUNT(?obs) AS ?observations) ?dataset
WHERE {
  ?obs a qb:Observation ;
         qb:dataSet ?dataset .
} GROUP BY ?dataset ORDER BY DESC(?observations)
""")

table = pd.DataFrame()
table['Observations'] = pd.Series({
    res['dataset'].value : res['observations'].value
    for res in sparql.query().bindings
})
table

Unnamed: 0,Observations
http://gss-data.org.uk/data/hmrc-regional-trade-statistics,3241972
http://gss-data.org.uk/data/hmrc-overseas-trade-statistics,1499970
http://gss-data.org.uk/data/ons-cpa,399992
http://gss-data.org.uk/data/ons-trade-in-goods-mrets,264270
http://gss-data.org.uk/data/ons-bop-individual-country-data,80756
http://gss-data.org.uk/data/ons-pink-book-chapter-3,5378
http://gss-data.org.uk/data/ons-ltim-age-and-sex,2819
http://gss-data.org.uk/data/ons-abs,2025
http://gss-data.org.uk/data/ons-ltim-citizenship,1706
http://gss-data.org.uk/data/ons-ltim-country-of-residence,1472


In [3]:
sparql.setQuery("""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX qb:   <http://purl.org/linked-data/cube#>
PREFIX pmd:  <http://publishmydata.com/def/dataset#>
PREFIX gdp:  <http://gss-data.org.uk/def/gdp#>

SELECT DISTINCT ?dataset ?datasetLabel ?graph ?family
WHERE {
  ?dataset a qb:DataSet ;
         rdfs:label ?datasetLabel ;
         pmd:graph ?graph .
  OPTIONAL {
    ?dataset gdp:family [rdfs:label ?family]
  }
}
""")

results = sparql.query().bindings

table['Label'] = pd.Series({
    res['dataset'].value: res['datasetLabel'].value
    for res in results
})

table['Graph'] = pd.Series({
    res['dataset'].value: res['graph'].value
    for res in results
})

table['Family'] = pd.Series({
    res['dataset'].value: res['family'].value if 'family' in res else 'Trade'
    for res in results
})
table

Unnamed: 0,Observations,Label,Graph,Family
http://gss-data.org.uk/data/hmrc-regional-trade-statistics,3241972,HMRC Regional Trade Statistics,http://gss-data.org.uk/graph/hmrc-regional-tra...,Trade
http://gss-data.org.uk/data/hmrc-overseas-trade-statistics,1499970,HMRC Overseas Trade Statistics,http://gss-data.org.uk/graph/hmrc-overseas-tra...,Trade
http://gss-data.org.uk/data/ons-cpa,399992,ONS CPA,http://gss-data.org.uk/graph/ons-cpa,Trade
http://gss-data.org.uk/data/ons-trade-in-goods-mrets,264270,ONS Trade in goods MRETS,http://gss-data.org.uk/graph/ons-trade-in-good...,Trade
http://gss-data.org.uk/data/ons-bop-individual-country-data,80756,ONS BoP Individual Country Data,http://gss-data.org.uk/graph/ons-bop-individua...,Trade
http://gss-data.org.uk/data/ons-pink-book-chapter-3,5378,ONS Pink Book Chapter 3,http://gss-data.org.uk/graph/ons-pink-book-cha...,Trade
http://gss-data.org.uk/data/ons-ltim-age-and-sex,2819,ONS LTIM Age and Sex,http://gss-data.org.uk/graph/ons-ltim-age-and-sex,Migration
http://gss-data.org.uk/data/ons-abs,2025,ONS ABS,http://gss-data.org.uk/graph/ons-abs,Trade
http://gss-data.org.uk/data/ons-ltim-citizenship,1706,ONS LTIM citizenship,http://gss-data.org.uk/graph/ons-ltim-citizenship,Migration
http://gss-data.org.uk/data/ons-ltim-country-of-residence,1472,ONS LTIM country of residence,http://gss-data.org.uk/graph/ons-ltim-country-...,Migration


In [4]:
sparql.setQuery("""
SELECT (COUNT(*) as ?size) ?graph
WHERE {
  GRAPH ?graph {
    ?s ?p ?o
  }
} GROUP BY ?graph
""")

sizes = pd.DataFrame()
sizes['Triples'] = pd.Series({
    res['graph'].value : int(res['size'].value)
    for res in sparql.query().bindings
})
sizes

Unnamed: 0,Triples
http://purl.org/dc/terms/,866
http://gss-data.org.uk/graph/semstats/cpav2008-cpav21,28071
http://gss-data.org.uk/graph/semstats/cpav2008,47707
http://gss-data.org.uk/graph/semstats/cpav21,44275
http://gss-data.org.uk/graph/semstats/cpcv11,29269
http://gss-data.org.uk/graph/semstats/cpcv2,44159
http://gss-data.org.uk/graph/semstats/cpcv21,36837
http://gss-data.org.uk/graph/semstats/cpcv11-cpcv2,15202
http://gss-data.org.uk/graph/semstats/cpcv2-cpcv21,14788
http://gss-data.org.uk/graph/semstats/isicr31,5438


In [5]:
table = table.merge(sizes, left_on='Graph', right_index=True)
table.drop(columns=['Graph'], inplace=True)
table

Unnamed: 0,Observations,Label,Family,Triples
http://gss-data.org.uk/data/hmrc-regional-trade-statistics,3241972,HMRC Regional Trade Statistics,Trade,84297394
http://gss-data.org.uk/data/hmrc-overseas-trade-statistics,1499970,HMRC Overseas Trade Statistics,Trade,36048123
http://gss-data.org.uk/data/ons-cpa,399992,ONS CPA,Trade,12806153
http://gss-data.org.uk/data/ons-trade-in-goods-mrets,264270,ONS Trade in goods MRETS,Trade,7929621
http://gss-data.org.uk/data/ons-bop-individual-country-data,80756,ONS BoP Individual Country Data,Trade,1777303
http://gss-data.org.uk/data/ons-pink-book-chapter-3,5378,ONS Pink Book Chapter 3,Trade,150770
http://gss-data.org.uk/data/ons-ltim-age-and-sex,2819,ONS LTIM Age and Sex,Migration,84949
http://gss-data.org.uk/data/ons-abs,2025,ONS ABS,Trade,61101
http://gss-data.org.uk/data/ons-ltim-citizenship,1706,ONS LTIM citizenship,Migration,48123
http://gss-data.org.uk/data/ons-ltim-country-of-residence,1472,ONS LTIM country of residence,Migration,67894


In [6]:
from urllib.parse import urlencode
def gss_url(uri):
    return 'http://gss-data.org.uk/resource?' + urlencode({
        'uri': uri
    })

table['Dataset'] = table.apply(lambda x: f'<a href="{gss_url(x.name)}">{x.Label}</a>', axis=1)
table.drop(columns=['Label'], inplace=True)
table = table[['Dataset', 'Observations', 'Family', 'Triples']]
pd.set_option('max_colwidth', -1)
with open('dataset-stats.html', 'w') as f:
    f.write(table.to_html(escape=False, index=False))
HTML(table.to_html(escape=False, index=False))

Dataset,Observations,Family,Triples
HMRC Regional Trade Statistics,3241972,Trade,84297394
HMRC Overseas Trade Statistics,1499970,Trade,36048123
ONS CPA,399992,Trade,12806153
ONS Trade in goods MRETS,264270,Trade,7929621
ONS BoP Individual Country Data,80756,Trade,1777303
ONS Pink Book Chapter 3,5378,Trade,150770
ONS LTIM Age and Sex,2819,Migration,84949
ONS ABS,2025,Trade,61101
ONS LTIM citizenship,1706,Migration,48123
ONS LTIM country of residence,1472,Migration,67894
