diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0bd8549 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.idea +in +project/target +target +.ipynb_checkpoints +report.xml \ No newline at end of file diff --git a/dataset-stats/size.ipynb b/dataset-stats/size.ipynb new file mode 100644 index 0000000..5ea605b --- /dev/null +++ b/dataset-stats/size.ipynb @@ -0,0 +1,795 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Gather some statistics about the datasets, vocabularies and codelists loaded into PMD." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from SPARQLWrapper import SPARQLWrapper2\n", + "import pandas as pd\n", + "from IPython.display import HTML\n", + "\n", + "endpoint = \"https://production-drafter-ons-alpha.publishmydata.com/v1/sparql/live\"\n", + "sparql = SPARQLWrapper2(endpoint)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Find the number of observations in each dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Observations
http://gss-data.org.uk/data/hmrc-overseas-trade-statistics1499970
http://gss-data.org.uk/data/hmrc-regional-trade-statistics639936
http://gss-data.org.uk/data/ons-cpa399992
http://gss-data.org.uk/data/ons-trade-in-goods-mrets264270
http://gss-data.org.uk/data/ons-bop-individual-country-data80756
http://gss-data.org.uk/data/ons-pink-book-chapter-35378
http://gss-data.org.uk/data/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015947
http://gss-data.org.uk/data/ons-abs648
http://gss-data.org.uk/data/ons-balance-of-payments396
\n", + "
" + ], + "text/plain": [ + " Observations\n", + "http://gss-data.org.uk/data/hmrc-overseas-trade... 1499970\n", + "http://gss-data.org.uk/data/hmrc-regional-trade... 639936\n", + "http://gss-data.org.uk/data/ons-cpa 399992\n", + "http://gss-data.org.uk/data/ons-trade-in-goods-... 264270\n", + "http://gss-data.org.uk/data/ons-bop-individual-... 80756\n", + "http://gss-data.org.uk/data/ons-pink-book-chapt... 5378\n", + "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... 947\n", + "http://gss-data.org.uk/data/ons-abs 648\n", + "http://gss-data.org.uk/data/ons-balance-of-paym... 396" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparql.setQuery(\"\"\"\n", + "PREFIX rdfs: \n", + "PREFIX qb: \n", + "\n", + "SELECT (COUNT(?obs) AS ?observations) ?dataset\n", + "WHERE {\n", + " ?obs a qb:Observation ;\n", + " qb:dataSet ?dataset .\n", + "} GROUP BY ?dataset ORDER BY DESC(?observations)\n", + "\"\"\")\n", + "\n", + "table = pd.DataFrame()\n", + "table['Observations'] = pd.Series({\n", + " res['dataset'].value : res['observations'].value\n", + " for res in sparql.query().bindings\n", + "})\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ObservationsLabelGraph
http://gss-data.org.uk/data/hmrc-overseas-trade-statistics1499970HMRC Overseas Trade Statisticshttp://gss-data.org.uk/graph/hmrc-overseas-tra...
http://gss-data.org.uk/data/hmrc-regional-trade-statistics639936HMRC Regional Trade Statisticshttp://gss-data.org.uk/graph/hmrc-regional-tra...
http://gss-data.org.uk/data/ons-cpa399992ONS CPAhttp://gss-data.org.uk/graph/ons-cpa
http://gss-data.org.uk/data/ons-trade-in-goods-mrets264270ONS Trade in goods MRETShttp://gss-data.org.uk/graph/ons-trade-in-good...
http://gss-data.org.uk/data/ons-bop-individual-country-data80756ONS BoP Individual Country Datahttp://gss-data.org.uk/graph/ons-bop-individua...
http://gss-data.org.uk/data/ons-pink-book-chapter-35378ONS Pink Book Chapter 3http://gss-data.org.uk/graph/ons-pink-book-cha...
http://gss-data.org.uk/data/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015947HMRC UK Trade in Goods Statistics by Business ...http://gss-data.org.uk/graph/hmrc-uk-trade-in-...
http://gss-data.org.uk/data/ons-abs648ONS ABShttp://gss-data.org.uk/graph/ons-abs
http://gss-data.org.uk/data/ons-balance-of-payments396ONS Balance of Paymentshttp://gss-data.org.uk/graph/ons-balance-of-pa...
\n", + "
" + ], + "text/plain": [ + " Observations \\\n", + "http://gss-data.org.uk/data/hmrc-overseas-trade... 1499970 \n", + "http://gss-data.org.uk/data/hmrc-regional-trade... 639936 \n", + "http://gss-data.org.uk/data/ons-cpa 399992 \n", + "http://gss-data.org.uk/data/ons-trade-in-goods-... 264270 \n", + "http://gss-data.org.uk/data/ons-bop-individual-... 80756 \n", + "http://gss-data.org.uk/data/ons-pink-book-chapt... 5378 \n", + "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... 947 \n", + "http://gss-data.org.uk/data/ons-abs 648 \n", + "http://gss-data.org.uk/data/ons-balance-of-paym... 396 \n", + "\n", + " Label \\\n", + "http://gss-data.org.uk/data/hmrc-overseas-trade... HMRC Overseas Trade Statistics \n", + "http://gss-data.org.uk/data/hmrc-regional-trade... HMRC Regional Trade Statistics \n", + "http://gss-data.org.uk/data/ons-cpa ONS CPA \n", + "http://gss-data.org.uk/data/ons-trade-in-goods-... ONS Trade in goods MRETS \n", + "http://gss-data.org.uk/data/ons-bop-individual-... ONS BoP Individual Country Data \n", + "http://gss-data.org.uk/data/ons-pink-book-chapt... ONS Pink Book Chapter 3 \n", + "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... HMRC UK Trade in Goods Statistics by Business ... \n", + "http://gss-data.org.uk/data/ons-abs ONS ABS \n", + "http://gss-data.org.uk/data/ons-balance-of-paym... ONS Balance of Payments \n", + "\n", + " Graph \n", + "http://gss-data.org.uk/data/hmrc-overseas-trade... http://gss-data.org.uk/graph/hmrc-overseas-tra... \n", + "http://gss-data.org.uk/data/hmrc-regional-trade... http://gss-data.org.uk/graph/hmrc-regional-tra... \n", + "http://gss-data.org.uk/data/ons-cpa http://gss-data.org.uk/graph/ons-cpa \n", + "http://gss-data.org.uk/data/ons-trade-in-goods-... http://gss-data.org.uk/graph/ons-trade-in-good... \n", + "http://gss-data.org.uk/data/ons-bop-individual-... http://gss-data.org.uk/graph/ons-bop-individua... \n", + "http://gss-data.org.uk/data/ons-pink-book-chapt... http://gss-data.org.uk/graph/ons-pink-book-cha... \n", + "http://gss-data.org.uk/data/hmrc-uk-trade-in-go... http://gss-data.org.uk/graph/hmrc-uk-trade-in-... \n", + "http://gss-data.org.uk/data/ons-abs http://gss-data.org.uk/graph/ons-abs \n", + "http://gss-data.org.uk/data/ons-balance-of-paym... http://gss-data.org.uk/graph/ons-balance-of-pa... " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparql.setQuery(\"\"\"\n", + "PREFIX rdfs: \n", + "PREFIX qb: \n", + "PREFIX pmd: \n", + "\n", + "SELECT DISTINCT ?dataset ?datasetLabel ?graph\n", + "WHERE {\n", + " ?dataset a qb:DataSet ;\n", + " rdfs:label ?datasetLabel ;\n", + " pmd:graph ?graph .\n", + "}\n", + "\"\"\")\n", + "\n", + "results = sparql.query().bindings\n", + "\n", + "table['Label'] = pd.Series({\n", + " res['dataset'].value: res['datasetLabel'].value\n", + " for res in results\n", + "})\n", + "\n", + "table['Graph'] = pd.Series({\n", + " res['dataset'].value: res['graph'].value\n", + " for res in results\n", + "})\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Triples
http://gss-data.org.uk/graph/semstats/cpav2008-cpav2128071
http://gss-data.org.uk/graph/semstats/cpav200847707
http://gss-data.org.uk/graph/semstats/cpav2144275
http://gss-data.org.uk/graph/semstats/cpcv1129269
http://gss-data.org.uk/graph/semstats/cpcv244159
http://gss-data.org.uk/graph/semstats/cpcv2136837
http://gss-data.org.uk/graph/semstats/cpcv11-cpcv215202
http://gss-data.org.uk/graph/semstats/cpcv2-cpcv2114788
http://gss-data.org.uk/graph/semstats/isicr315438
http://gss-data.org.uk/graph/semstats/isicr31-cpcv1113350
http://gss-data.org.uk/graph/semstats/isicr31-isicr44116
http://gss-data.org.uk/graph/semstats/isicr49249
http://gss-data.org.uk/graph/semstats/isicr4-cpcv2113320
http://gss-data.org.uk/graph/semstats/isicr4-cpcv212305
http://gss-data.org.uk/graph/semstats/isicr4-nacer24311
http://gss-data.org.uk/graph/semstats/isicr4-naics20127591
http://gss-data.org.uk/graph/semstats/nacer119605
http://gss-data.org.uk/graph/semstats/nacer212806
http://gss-data.org.uk/graph/semstats/nacer2-cpav2116066
http://gss-data.org.uk/graph/semstats/nacer2-cpav200815716
http://gss-data.org.uk/graph/semstats/nacer11-nacer25096
http://gss-data.org.uk/graph/semstats/naics201217756
http://publishmydata.com/graph/vocabulary/22-rdf-syntax-ns102
http://publishmydata.com/graph/vocabulary/admingeo1801
http://publishmydata.com/graph/vocabulary/sdmx100
http://publishmydata.com/graph/vocabulary/skos259
http://publishmydata.com/graph/vocabulary/sdmx-subject295
http://publishmydata.com/graph/vocabulary/statistical-quality32
http://publishmydata.com/graph/vocabulary/rdf-schema87
http://publishmydata.com/graph/vocabulary/sdmx-concept1021
......
http://gss-data.org.uk/graph/ons-cpa/metadata14
http://gss-data.org.uk/graph/ons-cpa12806153
http://gss-data.org.uk/graph/ons-pink-book-chapter-3/metadata14
http://gss-data.org.uk/graph/ons-pink-book-chapter-3150770
http://gss-data.org.uk/graph/hmrc-regional-trade-statistics/metadata16
http://gss-data.org.uk/graph/bop-services140
http://gss-data.org.uk/graph/pink-book-services1640
http://gss-data.org.uk/graph/export-and-import-activity63
http://gss-data.org.uk/graph/age-of-business164
http://gss-data.org.uk/graph/hmrc-regional-trade-statistics16650299
http://publishmydata.com/graph/vocabulary/csvw632
http://gss-data.org.uk/graph/gdp23377
http://gss-data.org.uk/graph/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015/metadata15
http://gss-data.org.uk/graph/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-201527243
http://gss-data.org.uk/def/cdid37137
http://gss-data.org.uk/graph/hmrc-industry-groups152
http://gss-data.org.uk/graph/statistical-geography555
http://gss-data.org.uk/graph/pmd-foi-ontology77
http://gss-data.org.uk/graph/hmrc-regions175
http://gss-data.org.uk/graph/ons-bop-individual-country-data/metadata14
http://gss-data.org.uk/graph/ons-bop-individual-country-data1777303
http://foo.bar.com/4
http://gss-data.org.uk/graph/measurement-units67
https://trade.ec.europa.eu/def/cn_201261259
https://trade.ec.europa.eu/def/cn_201361184
https://trade.ec.europa.eu/def/cn_201461194
https://trade.ec.europa.eu/def/cn_201561224
http://gss-data.org.uk/graph/reference-time2468
http://gss-data.org.uk/graph/sitc-444194
https://trade.ec.europa.eu/def/cn_201661369
\n", + "

99 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " Triples\n", + "http://gss-data.org.uk/graph/semstats/cpav2008-... 28071\n", + "http://gss-data.org.uk/graph/semstats/cpav2008 47707\n", + "http://gss-data.org.uk/graph/semstats/cpav21 44275\n", + "http://gss-data.org.uk/graph/semstats/cpcv11 29269\n", + "http://gss-data.org.uk/graph/semstats/cpcv2 44159\n", + "http://gss-data.org.uk/graph/semstats/cpcv21 36837\n", + "http://gss-data.org.uk/graph/semstats/cpcv11-cpcv2 15202\n", + "http://gss-data.org.uk/graph/semstats/cpcv2-cpcv21 14788\n", + "http://gss-data.org.uk/graph/semstats/isicr31 5438\n", + "http://gss-data.org.uk/graph/semstats/isicr31-c... 13350\n", + "http://gss-data.org.uk/graph/semstats/isicr31-i... 4116\n", + "http://gss-data.org.uk/graph/semstats/isicr4 9249\n", + "http://gss-data.org.uk/graph/semstats/isicr4-cp... 13320\n", + "http://gss-data.org.uk/graph/semstats/isicr4-cpcv2 12305\n", + "http://gss-data.org.uk/graph/semstats/isicr4-na... 4311\n", + "http://gss-data.org.uk/graph/semstats/isicr4-na... 7591\n", + "http://gss-data.org.uk/graph/semstats/nacer11 9605\n", + "http://gss-data.org.uk/graph/semstats/nacer2 12806\n", + "http://gss-data.org.uk/graph/semstats/nacer2-cp... 16066\n", + "http://gss-data.org.uk/graph/semstats/nacer2-cp... 15716\n", + "http://gss-data.org.uk/graph/semstats/nacer11-n... 5096\n", + "http://gss-data.org.uk/graph/semstats/naics2012 17756\n", + "http://publishmydata.com/graph/vocabulary/22-rd... 102\n", + "http://publishmydata.com/graph/vocabulary/admingeo 1801\n", + "http://publishmydata.com/graph/vocabulary/sdmx 100\n", + "http://publishmydata.com/graph/vocabulary/skos 259\n", + "http://publishmydata.com/graph/vocabulary/sdmx-... 295\n", + "http://publishmydata.com/graph/vocabulary/stati... 32\n", + "http://publishmydata.com/graph/vocabulary/rdf-s... 87\n", + "http://publishmydata.com/graph/vocabulary/sdmx-... 1021\n", + "... ...\n", + "http://gss-data.org.uk/graph/ons-cpa/metadata 14\n", + "http://gss-data.org.uk/graph/ons-cpa 12806153\n", + "http://gss-data.org.uk/graph/ons-pink-book-chap... 14\n", + "http://gss-data.org.uk/graph/ons-pink-book-chap... 150770\n", + "http://gss-data.org.uk/graph/hmrc-regional-trad... 16\n", + "http://gss-data.org.uk/graph/bop-services 140\n", + "http://gss-data.org.uk/graph/pink-book-services 1640\n", + "http://gss-data.org.uk/graph/export-and-import-... 63\n", + "http://gss-data.org.uk/graph/age-of-business 164\n", + "http://gss-data.org.uk/graph/hmrc-regional-trad... 16650299\n", + "http://publishmydata.com/graph/vocabulary/csvw 632\n", + "http://gss-data.org.uk/graph/gdp233 77\n", + "http://gss-data.org.uk/graph/hmrc-uk-trade-in-g... 15\n", + "http://gss-data.org.uk/graph/hmrc-uk-trade-in-g... 27243\n", + "http://gss-data.org.uk/def/cdid 37137\n", + "http://gss-data.org.uk/graph/hmrc-industry-groups 152\n", + "http://gss-data.org.uk/graph/statistical-geography 555\n", + "http://gss-data.org.uk/graph/pmd-foi-ontology 77\n", + "http://gss-data.org.uk/graph/hmrc-regions 175\n", + "http://gss-data.org.uk/graph/ons-bop-individual... 14\n", + "http://gss-data.org.uk/graph/ons-bop-individual... 1777303\n", + "http://foo.bar.com/ 4\n", + "http://gss-data.org.uk/graph/measurement-units 67\n", + "https://trade.ec.europa.eu/def/cn_2012 61259\n", + "https://trade.ec.europa.eu/def/cn_2013 61184\n", + "https://trade.ec.europa.eu/def/cn_2014 61194\n", + "https://trade.ec.europa.eu/def/cn_2015 61224\n", + "http://gss-data.org.uk/graph/reference-time 2468\n", + "http://gss-data.org.uk/graph/sitc-4 44194\n", + "https://trade.ec.europa.eu/def/cn_2016 61369\n", + "\n", + "[99 rows x 1 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparql.setQuery(\"\"\"\n", + "SELECT (COUNT(*) as ?size) ?graph\n", + "WHERE {\n", + " GRAPH ?graph {\n", + " ?s ?p ?o\n", + " }\n", + "} GROUP BY ?graph\n", + "\"\"\")\n", + "\n", + "sizes = pd.DataFrame()\n", + "sizes['Triples'] = pd.Series({\n", + " res['graph'].value : int(res['size'].value)\n", + " for res in sparql.query().bindings\n", + "})\n", + "sizes" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "table = table.merge(sizes, left_on='Graph', right_index=True)\n", + "table.drop(columns=['Graph'], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatasetObservationsTriples
HMRC Overseas Trade Statistics149997036048123
HMRC Regional Trade Statistics63993616650299
ONS CPA39999212806153
ONS Trade in goods MRETS2642707929621
ONS BoP Individual Country Data807561777303
ONS Pink Book Chapter 35378150770
HMRC UK Trade in Goods Statistics by Business Characteristics 201594727243
ONS ABS64854616
ONS Balance of Payments39610593
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from urllib.parse import urlencode\n", + "def gss_url(uri):\n", + " return 'http://gss-data.org.uk/resource?' + urlencode({\n", + " 'uri': uri\n", + " })\n", + "\n", + "table['Dataset'] = table.apply(lambda x: f'{x.Label}', axis=1)\n", + "table.drop(columns=['Label'], inplace=True)\n", + "table = table[['Dataset', 'Observations', 'Triples']]\n", + "pd.set_option('max_colwidth', -1)\n", + "with open('dataset-stats.html', 'w') as f:\n", + " f.write(table.to_html(escape=False, index=False))\n", + "HTML(table.to_html(escape=False, index=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}