diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0bd8549
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.idea
+in
+project/target
+target
+.ipynb_checkpoints
+report.xml
\ No newline at end of file
diff --git a/dataset-stats/size.ipynb b/dataset-stats/size.ipynb
new file mode 100644
index 0000000..5ea605b
--- /dev/null
+++ b/dataset-stats/size.ipynb
@@ -0,0 +1,795 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Gather some statistics about the datasets, vocabularies and codelists loaded into PMD."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from SPARQLWrapper import SPARQLWrapper2\n",
+ "import pandas as pd\n",
+ "from IPython.display import HTML\n",
+ "\n",
+ "endpoint = \"https://production-drafter-ons-alpha.publishmydata.com/v1/sparql/live\"\n",
+ "sparql = SPARQLWrapper2(endpoint)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Find the number of observations in each dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Observations | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " http://gss-data.org.uk/data/hmrc-overseas-trade-statistics | \n",
+ " 1499970 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/hmrc-regional-trade-statistics | \n",
+ " 639936 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-cpa | \n",
+ " 399992 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-trade-in-goods-mrets | \n",
+ " 264270 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-bop-individual-country-data | \n",
+ " 80756 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-pink-book-chapter-3 | \n",
+ " 5378 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015 | \n",
+ " 947 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-abs | \n",
+ " 648 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-balance-of-payments | \n",
+ " 396 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Observations\n",
+ "http://gss-data.org.uk/data/hmrc-overseas-trade... 1499970\n",
+ "http://gss-data.org.uk/data/hmrc-regional-trade... 639936\n",
+ "http://gss-data.org.uk/data/ons-cpa 399992\n",
+ "http://gss-data.org.uk/data/ons-trade-in-goods-... 264270\n",
+ "http://gss-data.org.uk/data/ons-bop-individual-... 80756\n",
+ "http://gss-data.org.uk/data/ons-pink-book-chapt... 5378\n",
+ "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... 947\n",
+ "http://gss-data.org.uk/data/ons-abs 648\n",
+ "http://gss-data.org.uk/data/ons-balance-of-paym... 396"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sparql.setQuery(\"\"\"\n",
+ "PREFIX rdfs: \n",
+ "PREFIX qb: \n",
+ "\n",
+ "SELECT (COUNT(?obs) AS ?observations) ?dataset\n",
+ "WHERE {\n",
+ " ?obs a qb:Observation ;\n",
+ " qb:dataSet ?dataset .\n",
+ "} GROUP BY ?dataset ORDER BY DESC(?observations)\n",
+ "\"\"\")\n",
+ "\n",
+ "table = pd.DataFrame()\n",
+ "table['Observations'] = pd.Series({\n",
+ " res['dataset'].value : res['observations'].value\n",
+ " for res in sparql.query().bindings\n",
+ "})\n",
+ "table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Observations | \n",
+ " Label | \n",
+ " Graph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " http://gss-data.org.uk/data/hmrc-overseas-trade-statistics | \n",
+ " 1499970 | \n",
+ " HMRC Overseas Trade Statistics | \n",
+ " http://gss-data.org.uk/graph/hmrc-overseas-tra... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/hmrc-regional-trade-statistics | \n",
+ " 639936 | \n",
+ " HMRC Regional Trade Statistics | \n",
+ " http://gss-data.org.uk/graph/hmrc-regional-tra... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-cpa | \n",
+ " 399992 | \n",
+ " ONS CPA | \n",
+ " http://gss-data.org.uk/graph/ons-cpa | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-trade-in-goods-mrets | \n",
+ " 264270 | \n",
+ " ONS Trade in goods MRETS | \n",
+ " http://gss-data.org.uk/graph/ons-trade-in-good... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-bop-individual-country-data | \n",
+ " 80756 | \n",
+ " ONS BoP Individual Country Data | \n",
+ " http://gss-data.org.uk/graph/ons-bop-individua... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-pink-book-chapter-3 | \n",
+ " 5378 | \n",
+ " ONS Pink Book Chapter 3 | \n",
+ " http://gss-data.org.uk/graph/ons-pink-book-cha... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015 | \n",
+ " 947 | \n",
+ " HMRC UK Trade in Goods Statistics by Business ... | \n",
+ " http://gss-data.org.uk/graph/hmrc-uk-trade-in-... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-abs | \n",
+ " 648 | \n",
+ " ONS ABS | \n",
+ " http://gss-data.org.uk/graph/ons-abs | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/data/ons-balance-of-payments | \n",
+ " 396 | \n",
+ " ONS Balance of Payments | \n",
+ " http://gss-data.org.uk/graph/ons-balance-of-pa... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Observations \\\n",
+ "http://gss-data.org.uk/data/hmrc-overseas-trade... 1499970 \n",
+ "http://gss-data.org.uk/data/hmrc-regional-trade... 639936 \n",
+ "http://gss-data.org.uk/data/ons-cpa 399992 \n",
+ "http://gss-data.org.uk/data/ons-trade-in-goods-... 264270 \n",
+ "http://gss-data.org.uk/data/ons-bop-individual-... 80756 \n",
+ "http://gss-data.org.uk/data/ons-pink-book-chapt... 5378 \n",
+ "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... 947 \n",
+ "http://gss-data.org.uk/data/ons-abs 648 \n",
+ "http://gss-data.org.uk/data/ons-balance-of-paym... 396 \n",
+ "\n",
+ " Label \\\n",
+ "http://gss-data.org.uk/data/hmrc-overseas-trade... HMRC Overseas Trade Statistics \n",
+ "http://gss-data.org.uk/data/hmrc-regional-trade... HMRC Regional Trade Statistics \n",
+ "http://gss-data.org.uk/data/ons-cpa ONS CPA \n",
+ "http://gss-data.org.uk/data/ons-trade-in-goods-... ONS Trade in goods MRETS \n",
+ "http://gss-data.org.uk/data/ons-bop-individual-... ONS BoP Individual Country Data \n",
+ "http://gss-data.org.uk/data/ons-pink-book-chapt... ONS Pink Book Chapter 3 \n",
+ "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... HMRC UK Trade in Goods Statistics by Business ... \n",
+ "http://gss-data.org.uk/data/ons-abs ONS ABS \n",
+ "http://gss-data.org.uk/data/ons-balance-of-paym... ONS Balance of Payments \n",
+ "\n",
+ " Graph \n",
+ "http://gss-data.org.uk/data/hmrc-overseas-trade... http://gss-data.org.uk/graph/hmrc-overseas-tra... \n",
+ "http://gss-data.org.uk/data/hmrc-regional-trade... http://gss-data.org.uk/graph/hmrc-regional-tra... \n",
+ "http://gss-data.org.uk/data/ons-cpa http://gss-data.org.uk/graph/ons-cpa \n",
+ "http://gss-data.org.uk/data/ons-trade-in-goods-... http://gss-data.org.uk/graph/ons-trade-in-good... \n",
+ "http://gss-data.org.uk/data/ons-bop-individual-... http://gss-data.org.uk/graph/ons-bop-individua... \n",
+ "http://gss-data.org.uk/data/ons-pink-book-chapt... http://gss-data.org.uk/graph/ons-pink-book-cha... \n",
+ "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... http://gss-data.org.uk/graph/hmrc-uk-trade-in-... \n",
+ "http://gss-data.org.uk/data/ons-abs http://gss-data.org.uk/graph/ons-abs \n",
+ "http://gss-data.org.uk/data/ons-balance-of-paym... http://gss-data.org.uk/graph/ons-balance-of-pa... "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sparql.setQuery(\"\"\"\n",
+ "PREFIX rdfs: \n",
+ "PREFIX qb: \n",
+ "PREFIX pmd: \n",
+ "\n",
+ "SELECT DISTINCT ?dataset ?datasetLabel ?graph\n",
+ "WHERE {\n",
+ " ?dataset a qb:DataSet ;\n",
+ " rdfs:label ?datasetLabel ;\n",
+ " pmd:graph ?graph .\n",
+ "}\n",
+ "\"\"\")\n",
+ "\n",
+ "results = sparql.query().bindings\n",
+ "\n",
+ "table['Label'] = pd.Series({\n",
+ " res['dataset'].value: res['datasetLabel'].value\n",
+ " for res in results\n",
+ "})\n",
+ "\n",
+ "table['Graph'] = pd.Series({\n",
+ " res['dataset'].value: res['graph'].value\n",
+ " for res in results\n",
+ "})\n",
+ "table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Triples | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpav2008-cpav21 | \n",
+ " 28071 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpav2008 | \n",
+ " 47707 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpav21 | \n",
+ " 44275 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpcv11 | \n",
+ " 29269 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpcv2 | \n",
+ " 44159 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpcv21 | \n",
+ " 36837 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpcv11-cpcv2 | \n",
+ " 15202 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/cpcv2-cpcv21 | \n",
+ " 14788 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr31 | \n",
+ " 5438 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr31-cpcv11 | \n",
+ " 13350 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr31-isicr4 | \n",
+ " 4116 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr4 | \n",
+ " 9249 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr4-cpcv21 | \n",
+ " 13320 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr4-cpcv2 | \n",
+ " 12305 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr4-nacer2 | \n",
+ " 4311 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/isicr4-naics2012 | \n",
+ " 7591 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/nacer11 | \n",
+ " 9605 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/nacer2 | \n",
+ " 12806 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/nacer2-cpav21 | \n",
+ " 16066 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/nacer2-cpav2008 | \n",
+ " 15716 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/nacer11-nacer2 | \n",
+ " 5096 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/semstats/naics2012 | \n",
+ " 17756 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/22-rdf-syntax-ns | \n",
+ " 102 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/admingeo | \n",
+ " 1801 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/sdmx | \n",
+ " 100 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/skos | \n",
+ " 259 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/sdmx-subject | \n",
+ " 295 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/statistical-quality | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/rdf-schema | \n",
+ " 87 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/sdmx-concept | \n",
+ " 1021 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/ons-cpa/metadata | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/ons-cpa | \n",
+ " 12806153 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/ons-pink-book-chapter-3/metadata | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/ons-pink-book-chapter-3 | \n",
+ " 150770 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/hmrc-regional-trade-statistics/metadata | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/bop-services | \n",
+ " 140 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/pink-book-services | \n",
+ " 1640 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/export-and-import-activity | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/age-of-business | \n",
+ " 164 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/hmrc-regional-trade-statistics | \n",
+ " 16650299 | \n",
+ "
\n",
+ " \n",
+ " http://publishmydata.com/graph/vocabulary/csvw | \n",
+ " 632 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/gdp233 | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015/metadata | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015 | \n",
+ " 27243 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/def/cdid | \n",
+ " 37137 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/hmrc-industry-groups | \n",
+ " 152 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/statistical-geography | \n",
+ " 555 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/pmd-foi-ontology | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/hmrc-regions | \n",
+ " 175 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/ons-bop-individual-country-data/metadata | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/ons-bop-individual-country-data | \n",
+ " 1777303 | \n",
+ "
\n",
+ " \n",
+ " http://foo.bar.com/ | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/measurement-units | \n",
+ " 67 | \n",
+ "
\n",
+ " \n",
+ " https://trade.ec.europa.eu/def/cn_2012 | \n",
+ " 61259 | \n",
+ "
\n",
+ " \n",
+ " https://trade.ec.europa.eu/def/cn_2013 | \n",
+ " 61184 | \n",
+ "
\n",
+ " \n",
+ " https://trade.ec.europa.eu/def/cn_2014 | \n",
+ " 61194 | \n",
+ "
\n",
+ " \n",
+ " https://trade.ec.europa.eu/def/cn_2015 | \n",
+ " 61224 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/reference-time | \n",
+ " 2468 | \n",
+ "
\n",
+ " \n",
+ " http://gss-data.org.uk/graph/sitc-4 | \n",
+ " 44194 | \n",
+ "
\n",
+ " \n",
+ " https://trade.ec.europa.eu/def/cn_2016 | \n",
+ " 61369 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
99 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Triples\n",
+ "http://gss-data.org.uk/graph/semstats/cpav2008-... 28071\n",
+ "http://gss-data.org.uk/graph/semstats/cpav2008 47707\n",
+ "http://gss-data.org.uk/graph/semstats/cpav21 44275\n",
+ "http://gss-data.org.uk/graph/semstats/cpcv11 29269\n",
+ "http://gss-data.org.uk/graph/semstats/cpcv2 44159\n",
+ "http://gss-data.org.uk/graph/semstats/cpcv21 36837\n",
+ "http://gss-data.org.uk/graph/semstats/cpcv11-cpcv2 15202\n",
+ "http://gss-data.org.uk/graph/semstats/cpcv2-cpcv21 14788\n",
+ "http://gss-data.org.uk/graph/semstats/isicr31 5438\n",
+ "http://gss-data.org.uk/graph/semstats/isicr31-c... 13350\n",
+ "http://gss-data.org.uk/graph/semstats/isicr31-i... 4116\n",
+ "http://gss-data.org.uk/graph/semstats/isicr4 9249\n",
+ "http://gss-data.org.uk/graph/semstats/isicr4-cp... 13320\n",
+ "http://gss-data.org.uk/graph/semstats/isicr4-cpcv2 12305\n",
+ "http://gss-data.org.uk/graph/semstats/isicr4-na... 4311\n",
+ "http://gss-data.org.uk/graph/semstats/isicr4-na... 7591\n",
+ "http://gss-data.org.uk/graph/semstats/nacer11 9605\n",
+ "http://gss-data.org.uk/graph/semstats/nacer2 12806\n",
+ "http://gss-data.org.uk/graph/semstats/nacer2-cp... 16066\n",
+ "http://gss-data.org.uk/graph/semstats/nacer2-cp... 15716\n",
+ "http://gss-data.org.uk/graph/semstats/nacer11-n... 5096\n",
+ "http://gss-data.org.uk/graph/semstats/naics2012 17756\n",
+ "http://publishmydata.com/graph/vocabulary/22-rd... 102\n",
+ "http://publishmydata.com/graph/vocabulary/admingeo 1801\n",
+ "http://publishmydata.com/graph/vocabulary/sdmx 100\n",
+ "http://publishmydata.com/graph/vocabulary/skos 259\n",
+ "http://publishmydata.com/graph/vocabulary/sdmx-... 295\n",
+ "http://publishmydata.com/graph/vocabulary/stati... 32\n",
+ "http://publishmydata.com/graph/vocabulary/rdf-s... 87\n",
+ "http://publishmydata.com/graph/vocabulary/sdmx-... 1021\n",
+ "... ...\n",
+ "http://gss-data.org.uk/graph/ons-cpa/metadata 14\n",
+ "http://gss-data.org.uk/graph/ons-cpa 12806153\n",
+ "http://gss-data.org.uk/graph/ons-pink-book-chap... 14\n",
+ "http://gss-data.org.uk/graph/ons-pink-book-chap... 150770\n",
+ "http://gss-data.org.uk/graph/hmrc-regional-trad... 16\n",
+ "http://gss-data.org.uk/graph/bop-services 140\n",
+ "http://gss-data.org.uk/graph/pink-book-services 1640\n",
+ "http://gss-data.org.uk/graph/export-and-import-... 63\n",
+ "http://gss-data.org.uk/graph/age-of-business 164\n",
+ "http://gss-data.org.uk/graph/hmrc-regional-trad... 16650299\n",
+ "http://publishmydata.com/graph/vocabulary/csvw 632\n",
+ "http://gss-data.org.uk/graph/gdp233 77\n",
+ "http://gss-data.org.uk/graph/hmrc-uk-trade-in-g... 15\n",
+ "http://gss-data.org.uk/graph/hmrc-uk-trade-in-g... 27243\n",
+ "http://gss-data.org.uk/def/cdid 37137\n",
+ "http://gss-data.org.uk/graph/hmrc-industry-groups 152\n",
+ "http://gss-data.org.uk/graph/statistical-geography 555\n",
+ "http://gss-data.org.uk/graph/pmd-foi-ontology 77\n",
+ "http://gss-data.org.uk/graph/hmrc-regions 175\n",
+ "http://gss-data.org.uk/graph/ons-bop-individual... 14\n",
+ "http://gss-data.org.uk/graph/ons-bop-individual... 1777303\n",
+ "http://foo.bar.com/ 4\n",
+ "http://gss-data.org.uk/graph/measurement-units 67\n",
+ "https://trade.ec.europa.eu/def/cn_2012 61259\n",
+ "https://trade.ec.europa.eu/def/cn_2013 61184\n",
+ "https://trade.ec.europa.eu/def/cn_2014 61194\n",
+ "https://trade.ec.europa.eu/def/cn_2015 61224\n",
+ "http://gss-data.org.uk/graph/reference-time 2468\n",
+ "http://gss-data.org.uk/graph/sitc-4 44194\n",
+ "https://trade.ec.europa.eu/def/cn_2016 61369\n",
+ "\n",
+ "[99 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sparql.setQuery(\"\"\"\n",
+ "SELECT (COUNT(*) as ?size) ?graph\n",
+ "WHERE {\n",
+ " GRAPH ?graph {\n",
+ " ?s ?p ?o\n",
+ " }\n",
+ "} GROUP BY ?graph\n",
+ "\"\"\")\n",
+ "\n",
+ "sizes = pd.DataFrame()\n",
+ "sizes['Triples'] = pd.Series({\n",
+ " res['graph'].value : int(res['size'].value)\n",
+ " for res in sparql.query().bindings\n",
+ "})\n",
+ "sizes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table = table.merge(sizes, left_on='Graph', right_index=True)\n",
+ "table.drop(columns=['Graph'], inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " Dataset | \n",
+ " Observations | \n",
+ " Triples | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " HMRC Overseas Trade Statistics | \n",
+ " 1499970 | \n",
+ " 36048123 | \n",
+ "
\n",
+ " \n",
+ " HMRC Regional Trade Statistics | \n",
+ " 639936 | \n",
+ " 16650299 | \n",
+ "
\n",
+ " \n",
+ " ONS CPA | \n",
+ " 399992 | \n",
+ " 12806153 | \n",
+ "
\n",
+ " \n",
+ " ONS Trade in goods MRETS | \n",
+ " 264270 | \n",
+ " 7929621 | \n",
+ "
\n",
+ " \n",
+ " ONS BoP Individual Country Data | \n",
+ " 80756 | \n",
+ " 1777303 | \n",
+ "
\n",
+ " \n",
+ " ONS Pink Book Chapter 3 | \n",
+ " 5378 | \n",
+ " 150770 | \n",
+ "
\n",
+ " \n",
+ " HMRC UK Trade in Goods Statistics by Business Characteristics 2015 | \n",
+ " 947 | \n",
+ " 27243 | \n",
+ "
\n",
+ " \n",
+ " ONS ABS | \n",
+ " 648 | \n",
+ " 54616 | \n",
+ "
\n",
+ " \n",
+ " ONS Balance of Payments | \n",
+ " 396 | \n",
+ " 10593 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from urllib.parse import urlencode\n",
+ "def gss_url(uri):\n",
+ " return 'http://gss-data.org.uk/resource?' + urlencode({\n",
+ " 'uri': uri\n",
+ " })\n",
+ "\n",
+ "table['Dataset'] = table.apply(lambda x: f'{x.Label}', axis=1)\n",
+ "table.drop(columns=['Label'], inplace=True)\n",
+ "table = table[['Dataset', 'Observations', 'Triples']]\n",
+ "pd.set_option('max_colwidth', -1)\n",
+ "with open('dataset-stats.html', 'w') as f:\n",
+ " f.write(table.to_html(escape=False, index=False))\n",
+ "HTML(table.to_html(escape=False, index=False))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}