# Investigating the coverage of the trade datasets family
## Setup SPARQL API

In [1]:
from SPARQLWrapper import SPARQLWrapper2
import pandas as pd
from IPython.display import HTML

endpoint = "https://production-drafter-ons-alpha.publishmydata.com/v1/sparql/live"
sparql = SPARQLWrapper2(endpoint)

## Retreive Data Structure Definitions
This retreives the codelist(s) for each dimension, for each dataset.

In [2]:
with open('./dataset-structure-geo.sparql', 'r') as query_file:
    query_str = query_file.read()

sparql.setQuery(query_str)

results = pd.DataFrame(sparql.query().bindings)

## Create URI presenters
We can create presenters that will provide, for each URI, an html link to the resource (i.e. the URI itself) with the `rdfs:label` used as the text.

In [3]:
def uri_to_label(uri_column,label_column):
    return(dict(zip(results[uri_column].map(lambda x: x.value),
                    results[label_column].map(lambda x: x.value))))

labeller = {**uri_to_label('dataset_uri','dataset'),
            **uri_to_label('dimension_uri','dimension'),
            **uri_to_label('codelist_view_uri','codelist')}

def presenter(uri):
    return(f'<a href="{uri}">{labeller[uri]}</a> ')

def lister(presenters):
    return(presenters.str.join(''))

## Present Codelists in a matrix

Prepare a matrix of datasets vs dimensions, with each cell summarising the codelists being used.

In [4]:
codelists = results[['dataset_uri','dimension_uri','codelist_view_uri']].applymap(lambda x: presenter(x.value))
codelists_pivot = codelists.pivot_table(index='dataset_uri',columns='dimension_uri',values='codelist_view_uri',aggfunc=lister)

Sort the matrix such that the most commonly re-used dimensions are to the left hand side.

In [5]:
dimension_counts = codelists_pivot.count('rows').sort_values(ascending=False)
codelists_pivot = codelists_pivot.reindex(columns=dimension_counts.index).fillna('')
pd.set_option('max_colwidth', -1) # don't truncate cells
with open("dataset-dimensions-geo.html", "w") as f:
    f.write(codelists_pivot.to_html(escape=False))
HTML(codelists_pivot.to_html(escape=True))

dimension_uri,"<a href=""http://gss-data.org.uk/def/dimension/trade-partner-geography"">Trade Partner Geography</a>","<a href=""http://gss-data.org.uk/def/dimension/citizenship"">Citizenship</a>","<a href=""http://gss-data.org.uk/def/dimension/residence"">Residence</a>","<a href=""http://gss-data.org.uk/def/dimension/trade-reporter-geography"">Trade Reporter Geography</a>","<a href=""http://gss-data.org.uk/def/dimension/ons-fdi-area"">ONS FDI Area</a>","<a href=""http://gss-data.org.uk/def/dimension/nationality"">Nationality</a>","<a href=""http://gss-data.org.uk/def/dimension/foreign-geography"">Foreign geography</a>","<a href=""http://gss-data.org.uk/def/dimension/country-of-ownership"">Country of Ownership</a>"
dataset_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"<a href=""http://gss-data.org.uk/data/gss_data/migration/dwp-nin-registrations-to-overseas-nationals"">National Insurance number allocations to adult overseas nationals to March 2018</a>",,,,,,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/nationality"">Nationality</a>",,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/ho-entry-visas"">Immigration statistics, October to December 2017: data tables second edition</a>",,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/ho-citizenship"">HO Citizenship</a>",,,,,,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/ons-ltim-citizenship"">Long-term international migration 2.01a, citizenship, UK and England and Wales</a>",,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/ltim-citizenship"">LTIM Citizenship</a>",,,,,,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/ons-ltim-country-of-residence"">Long-term international migration 2.02, country of last or next residence, UK and England and Wales</a>",,,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/country-of-residence"">Country of Residence</a>",,,,,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/ons-ltim-passenger-survey-4-01"">International Passenger Survey 4.01, citizenship group by sex by age by country of last or next residence</a>",,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/ips-citizenship"">IPS Citizenship</a>","<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/country-of-residence"">Country of Residence</a>",,,,,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/ons-ltim-passenger-survey-4-02"">International Passenger Survey 4.02, main reason for migration by citizenship</a>",,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/ips-citizenship"">IPS Citizenship</a>",,,,,,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/ons-ltim-passenger-survey-4-04"">International Passenger Survey 4.04, area of destination or origin within the UK by citizenship</a>",,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/ips-citizenship"">IPS Citizenship</a>",,,,,,
"<a href=""http://gss-data.org.uk/data/gss_data/migration/scotland-overseas"">Migration between Scotland and Overseas</a>",,,,,,,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/foreign-geography"">Foreign geography</a>",
"<a href=""http://gss-data.org.uk/data/gss_data/trade/hmrc_rts"">UK Regional Trade Statistics (RTS</a>","<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/hmrc-geographies"">HMRC Geographies</a>",,,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/hmrc-regions"">HMRC Regions</a>",,,,
"<a href=""http://gss-data.org.uk/data/gss_data/trade/ons-fdi"">Foreign direct investment involving UK companies: outward</a>",,,,,"<a href=""http://gss-data.org.uk/concept-scheme?uri=http://gss-data.org.uk/def/concept-scheme/ons-fdi-area"">ONS FDI Area</a>",,,


Note that although multiple datasets re-use the same common dimensions, they often use their own codelists. We know that there exist correspondances between these (e.g. BPM6 codes can be used to relate BOP and PB definitions) although we've not published the links for those yet.