Newer
Older
DataReport / dataset-stats / size.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Gather some statistics about the datasets, vocabularies and codelists loaded into PMD."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from SPARQLWrapper import SPARQLWrapper2\n",
    "import pandas as pd\n",
    "from IPython.display import HTML\n",
    "\n",
    "endpoint = \"https://production-drafter-ons-alpha.publishmydata.com/v1/sparql/live\"\n",
    "sparql = SPARQLWrapper2(endpoint)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Find the number of observations in each dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Observations</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/hmrc-overseas-trade-statistics</th>\n",
       "      <td>1499970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/hmrc-regional-trade-statistics</th>\n",
       "      <td>639936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-cpa</th>\n",
       "      <td>399992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-trade-in-goods-mrets</th>\n",
       "      <td>264270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-bop-individual-country-data</th>\n",
       "      <td>80756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-pink-book-chapter-3</th>\n",
       "      <td>5378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015</th>\n",
       "      <td>947</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-abs</th>\n",
       "      <td>648</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-balance-of-payments</th>\n",
       "      <td>396</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   Observations\n",
       "http://gss-data.org.uk/data/hmrc-overseas-trade...      1499970\n",
       "http://gss-data.org.uk/data/hmrc-regional-trade...       639936\n",
       "http://gss-data.org.uk/data/ons-cpa                      399992\n",
       "http://gss-data.org.uk/data/ons-trade-in-goods-...       264270\n",
       "http://gss-data.org.uk/data/ons-bop-individual-...        80756\n",
       "http://gss-data.org.uk/data/ons-pink-book-chapt...         5378\n",
       "http://gss-data.org.uk/data/hmrc-uk-trade-in-go...          947\n",
       "http://gss-data.org.uk/data/ons-abs                         648\n",
       "http://gss-data.org.uk/data/ons-balance-of-paym...          396"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sparql.setQuery(\"\"\"\n",
    "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
    "PREFIX qb:   <http://purl.org/linked-data/cube#>\n",
    "\n",
    "SELECT (COUNT(?obs) AS ?observations) ?dataset\n",
    "WHERE {\n",
    "  ?obs a qb:Observation ;\n",
    "         qb:dataSet ?dataset .\n",
    "} GROUP BY ?dataset ORDER BY DESC(?observations)\n",
    "\"\"\")\n",
    "\n",
    "table = pd.DataFrame()\n",
    "table['Observations'] = pd.Series({\n",
    "    res['dataset'].value : res['observations'].value\n",
    "    for res in sparql.query().bindings\n",
    "})\n",
    "table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Observations</th>\n",
       "      <th>Label</th>\n",
       "      <th>Graph</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/hmrc-overseas-trade-statistics</th>\n",
       "      <td>1499970</td>\n",
       "      <td>HMRC Overseas Trade Statistics</td>\n",
       "      <td>http://gss-data.org.uk/graph/hmrc-overseas-tra...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/hmrc-regional-trade-statistics</th>\n",
       "      <td>639936</td>\n",
       "      <td>HMRC Regional Trade Statistics</td>\n",
       "      <td>http://gss-data.org.uk/graph/hmrc-regional-tra...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-cpa</th>\n",
       "      <td>399992</td>\n",
       "      <td>ONS CPA</td>\n",
       "      <td>http://gss-data.org.uk/graph/ons-cpa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-trade-in-goods-mrets</th>\n",
       "      <td>264270</td>\n",
       "      <td>ONS Trade in goods MRETS</td>\n",
       "      <td>http://gss-data.org.uk/graph/ons-trade-in-good...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-bop-individual-country-data</th>\n",
       "      <td>80756</td>\n",
       "      <td>ONS BoP Individual Country Data</td>\n",
       "      <td>http://gss-data.org.uk/graph/ons-bop-individua...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-pink-book-chapter-3</th>\n",
       "      <td>5378</td>\n",
       "      <td>ONS Pink Book Chapter 3</td>\n",
       "      <td>http://gss-data.org.uk/graph/ons-pink-book-cha...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015</th>\n",
       "      <td>947</td>\n",
       "      <td>HMRC UK Trade in Goods Statistics by Business ...</td>\n",
       "      <td>http://gss-data.org.uk/graph/hmrc-uk-trade-in-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-abs</th>\n",
       "      <td>648</td>\n",
       "      <td>ONS ABS</td>\n",
       "      <td>http://gss-data.org.uk/graph/ons-abs</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/data/ons-balance-of-payments</th>\n",
       "      <td>396</td>\n",
       "      <td>ONS Balance of Payments</td>\n",
       "      <td>http://gss-data.org.uk/graph/ons-balance-of-pa...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   Observations  \\\n",
       "http://gss-data.org.uk/data/hmrc-overseas-trade...      1499970   \n",
       "http://gss-data.org.uk/data/hmrc-regional-trade...       639936   \n",
       "http://gss-data.org.uk/data/ons-cpa                      399992   \n",
       "http://gss-data.org.uk/data/ons-trade-in-goods-...       264270   \n",
       "http://gss-data.org.uk/data/ons-bop-individual-...        80756   \n",
       "http://gss-data.org.uk/data/ons-pink-book-chapt...         5378   \n",
       "http://gss-data.org.uk/data/hmrc-uk-trade-in-go...          947   \n",
       "http://gss-data.org.uk/data/ons-abs                         648   \n",
       "http://gss-data.org.uk/data/ons-balance-of-paym...          396   \n",
       "\n",
       "                                                                                                Label  \\\n",
       "http://gss-data.org.uk/data/hmrc-overseas-trade...                     HMRC Overseas Trade Statistics   \n",
       "http://gss-data.org.uk/data/hmrc-regional-trade...                     HMRC Regional Trade Statistics   \n",
       "http://gss-data.org.uk/data/ons-cpa                                                           ONS CPA   \n",
       "http://gss-data.org.uk/data/ons-trade-in-goods-...                           ONS Trade in goods MRETS   \n",
       "http://gss-data.org.uk/data/ons-bop-individual-...                    ONS BoP Individual Country Data   \n",
       "http://gss-data.org.uk/data/ons-pink-book-chapt...                            ONS Pink Book Chapter 3   \n",
       "http://gss-data.org.uk/data/hmrc-uk-trade-in-go...  HMRC UK Trade in Goods Statistics by Business ...   \n",
       "http://gss-data.org.uk/data/ons-abs                                                           ONS ABS   \n",
       "http://gss-data.org.uk/data/ons-balance-of-paym...                            ONS Balance of Payments   \n",
       "\n",
       "                                                                                                Graph  \n",
       "http://gss-data.org.uk/data/hmrc-overseas-trade...  http://gss-data.org.uk/graph/hmrc-overseas-tra...  \n",
       "http://gss-data.org.uk/data/hmrc-regional-trade...  http://gss-data.org.uk/graph/hmrc-regional-tra...  \n",
       "http://gss-data.org.uk/data/ons-cpa                              http://gss-data.org.uk/graph/ons-cpa  \n",
       "http://gss-data.org.uk/data/ons-trade-in-goods-...  http://gss-data.org.uk/graph/ons-trade-in-good...  \n",
       "http://gss-data.org.uk/data/ons-bop-individual-...  http://gss-data.org.uk/graph/ons-bop-individua...  \n",
       "http://gss-data.org.uk/data/ons-pink-book-chapt...  http://gss-data.org.uk/graph/ons-pink-book-cha...  \n",
       "http://gss-data.org.uk/data/hmrc-uk-trade-in-go...  http://gss-data.org.uk/graph/hmrc-uk-trade-in-...  \n",
       "http://gss-data.org.uk/data/ons-abs                              http://gss-data.org.uk/graph/ons-abs  \n",
       "http://gss-data.org.uk/data/ons-balance-of-paym...  http://gss-data.org.uk/graph/ons-balance-of-pa...  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sparql.setQuery(\"\"\"\n",
    "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
    "PREFIX qb:   <http://purl.org/linked-data/cube#>\n",
    "PREFIX pmd:  <http://publishmydata.com/def/dataset#>\n",
    "\n",
    "SELECT DISTINCT ?dataset ?datasetLabel ?graph\n",
    "WHERE {\n",
    "  ?dataset a qb:DataSet ;\n",
    "         rdfs:label ?datasetLabel ;\n",
    "         pmd:graph ?graph .\n",
    "}\n",
    "\"\"\")\n",
    "\n",
    "results = sparql.query().bindings\n",
    "\n",
    "table['Label'] = pd.Series({\n",
    "    res['dataset'].value: res['datasetLabel'].value\n",
    "    for res in results\n",
    "})\n",
    "\n",
    "table['Graph'] = pd.Series({\n",
    "    res['dataset'].value: res['graph'].value\n",
    "    for res in results\n",
    "})\n",
    "table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Triples</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpav2008-cpav21</th>\n",
       "      <td>28071</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpav2008</th>\n",
       "      <td>47707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpav21</th>\n",
       "      <td>44275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpcv11</th>\n",
       "      <td>29269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpcv2</th>\n",
       "      <td>44159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpcv21</th>\n",
       "      <td>36837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpcv11-cpcv2</th>\n",
       "      <td>15202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/cpcv2-cpcv21</th>\n",
       "      <td>14788</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr31</th>\n",
       "      <td>5438</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr31-cpcv11</th>\n",
       "      <td>13350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr31-isicr4</th>\n",
       "      <td>4116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr4</th>\n",
       "      <td>9249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr4-cpcv21</th>\n",
       "      <td>13320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr4-cpcv2</th>\n",
       "      <td>12305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr4-nacer2</th>\n",
       "      <td>4311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/isicr4-naics2012</th>\n",
       "      <td>7591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/nacer11</th>\n",
       "      <td>9605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/nacer2</th>\n",
       "      <td>12806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/nacer2-cpav21</th>\n",
       "      <td>16066</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/nacer2-cpav2008</th>\n",
       "      <td>15716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/nacer11-nacer2</th>\n",
       "      <td>5096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/semstats/naics2012</th>\n",
       "      <td>17756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/22-rdf-syntax-ns</th>\n",
       "      <td>102</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/admingeo</th>\n",
       "      <td>1801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/sdmx</th>\n",
       "      <td>100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/skos</th>\n",
       "      <td>259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/sdmx-subject</th>\n",
       "      <td>295</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/statistical-quality</th>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/rdf-schema</th>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/sdmx-concept</th>\n",
       "      <td>1021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/ons-cpa/metadata</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/ons-cpa</th>\n",
       "      <td>12806153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/ons-pink-book-chapter-3/metadata</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/ons-pink-book-chapter-3</th>\n",
       "      <td>150770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/hmrc-regional-trade-statistics/metadata</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/bop-services</th>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/pink-book-services</th>\n",
       "      <td>1640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/export-and-import-activity</th>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/age-of-business</th>\n",
       "      <td>164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/hmrc-regional-trade-statistics</th>\n",
       "      <td>16650299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://publishmydata.com/graph/vocabulary/csvw</th>\n",
       "      <td>632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/gdp233</th>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015/metadata</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/hmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015</th>\n",
       "      <td>27243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/def/cdid</th>\n",
       "      <td>37137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/hmrc-industry-groups</th>\n",
       "      <td>152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/statistical-geography</th>\n",
       "      <td>555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/pmd-foi-ontology</th>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/hmrc-regions</th>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/ons-bop-individual-country-data/metadata</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/ons-bop-individual-country-data</th>\n",
       "      <td>1777303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://foo.bar.com/</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/measurement-units</th>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>https://trade.ec.europa.eu/def/cn_2012</th>\n",
       "      <td>61259</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>https://trade.ec.europa.eu/def/cn_2013</th>\n",
       "      <td>61184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>https://trade.ec.europa.eu/def/cn_2014</th>\n",
       "      <td>61194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>https://trade.ec.europa.eu/def/cn_2015</th>\n",
       "      <td>61224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/reference-time</th>\n",
       "      <td>2468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>http://gss-data.org.uk/graph/sitc-4</th>\n",
       "      <td>44194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>https://trade.ec.europa.eu/def/cn_2016</th>\n",
       "      <td>61369</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>99 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                     Triples\n",
       "http://gss-data.org.uk/graph/semstats/cpav2008-...     28071\n",
       "http://gss-data.org.uk/graph/semstats/cpav2008         47707\n",
       "http://gss-data.org.uk/graph/semstats/cpav21           44275\n",
       "http://gss-data.org.uk/graph/semstats/cpcv11           29269\n",
       "http://gss-data.org.uk/graph/semstats/cpcv2            44159\n",
       "http://gss-data.org.uk/graph/semstats/cpcv21           36837\n",
       "http://gss-data.org.uk/graph/semstats/cpcv11-cpcv2     15202\n",
       "http://gss-data.org.uk/graph/semstats/cpcv2-cpcv21     14788\n",
       "http://gss-data.org.uk/graph/semstats/isicr31           5438\n",
       "http://gss-data.org.uk/graph/semstats/isicr31-c...     13350\n",
       "http://gss-data.org.uk/graph/semstats/isicr31-i...      4116\n",
       "http://gss-data.org.uk/graph/semstats/isicr4            9249\n",
       "http://gss-data.org.uk/graph/semstats/isicr4-cp...     13320\n",
       "http://gss-data.org.uk/graph/semstats/isicr4-cpcv2     12305\n",
       "http://gss-data.org.uk/graph/semstats/isicr4-na...      4311\n",
       "http://gss-data.org.uk/graph/semstats/isicr4-na...      7591\n",
       "http://gss-data.org.uk/graph/semstats/nacer11           9605\n",
       "http://gss-data.org.uk/graph/semstats/nacer2           12806\n",
       "http://gss-data.org.uk/graph/semstats/nacer2-cp...     16066\n",
       "http://gss-data.org.uk/graph/semstats/nacer2-cp...     15716\n",
       "http://gss-data.org.uk/graph/semstats/nacer11-n...      5096\n",
       "http://gss-data.org.uk/graph/semstats/naics2012        17756\n",
       "http://publishmydata.com/graph/vocabulary/22-rd...       102\n",
       "http://publishmydata.com/graph/vocabulary/admingeo      1801\n",
       "http://publishmydata.com/graph/vocabulary/sdmx           100\n",
       "http://publishmydata.com/graph/vocabulary/skos           259\n",
       "http://publishmydata.com/graph/vocabulary/sdmx-...       295\n",
       "http://publishmydata.com/graph/vocabulary/stati...        32\n",
       "http://publishmydata.com/graph/vocabulary/rdf-s...        87\n",
       "http://publishmydata.com/graph/vocabulary/sdmx-...      1021\n",
       "...                                                      ...\n",
       "http://gss-data.org.uk/graph/ons-cpa/metadata             14\n",
       "http://gss-data.org.uk/graph/ons-cpa                12806153\n",
       "http://gss-data.org.uk/graph/ons-pink-book-chap...        14\n",
       "http://gss-data.org.uk/graph/ons-pink-book-chap...    150770\n",
       "http://gss-data.org.uk/graph/hmrc-regional-trad...        16\n",
       "http://gss-data.org.uk/graph/bop-services                140\n",
       "http://gss-data.org.uk/graph/pink-book-services         1640\n",
       "http://gss-data.org.uk/graph/export-and-import-...        63\n",
       "http://gss-data.org.uk/graph/age-of-business             164\n",
       "http://gss-data.org.uk/graph/hmrc-regional-trad...  16650299\n",
       "http://publishmydata.com/graph/vocabulary/csvw           632\n",
       "http://gss-data.org.uk/graph/gdp233                       77\n",
       "http://gss-data.org.uk/graph/hmrc-uk-trade-in-g...        15\n",
       "http://gss-data.org.uk/graph/hmrc-uk-trade-in-g...     27243\n",
       "http://gss-data.org.uk/def/cdid                        37137\n",
       "http://gss-data.org.uk/graph/hmrc-industry-groups        152\n",
       "http://gss-data.org.uk/graph/statistical-geography       555\n",
       "http://gss-data.org.uk/graph/pmd-foi-ontology             77\n",
       "http://gss-data.org.uk/graph/hmrc-regions                175\n",
       "http://gss-data.org.uk/graph/ons-bop-individual...        14\n",
       "http://gss-data.org.uk/graph/ons-bop-individual...   1777303\n",
       "http://foo.bar.com/                                        4\n",
       "http://gss-data.org.uk/graph/measurement-units            67\n",
       "https://trade.ec.europa.eu/def/cn_2012                 61259\n",
       "https://trade.ec.europa.eu/def/cn_2013                 61184\n",
       "https://trade.ec.europa.eu/def/cn_2014                 61194\n",
       "https://trade.ec.europa.eu/def/cn_2015                 61224\n",
       "http://gss-data.org.uk/graph/reference-time             2468\n",
       "http://gss-data.org.uk/graph/sitc-4                    44194\n",
       "https://trade.ec.europa.eu/def/cn_2016                 61369\n",
       "\n",
       "[99 rows x 1 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sparql.setQuery(\"\"\"\n",
    "SELECT (COUNT(*) as ?size) ?graph\n",
    "WHERE {\n",
    "  GRAPH ?graph {\n",
    "    ?s ?p ?o\n",
    "  }\n",
    "} GROUP BY ?graph\n",
    "\"\"\")\n",
    "\n",
    "sizes = pd.DataFrame()\n",
    "sizes['Triples'] = pd.Series({\n",
    "    res['graph'].value : int(res['size'].value)\n",
    "    for res in sparql.query().bindings\n",
    "})\n",
    "sizes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "table = table.merge(sizes, left_on='Graph', right_index=True)\n",
    "table.drop(columns=['Graph'], inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Dataset</th>\n",
       "      <th>Observations</th>\n",
       "      <th>Triples</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fhmrc-overseas-trade-statistics\">HMRC Overseas Trade Statistics</a></td>\n",
       "      <td>1499970</td>\n",
       "      <td>36048123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fhmrc-regional-trade-statistics\">HMRC Regional Trade Statistics</a></td>\n",
       "      <td>639936</td>\n",
       "      <td>16650299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fons-cpa\">ONS CPA</a></td>\n",
       "      <td>399992</td>\n",
       "      <td>12806153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fons-trade-in-goods-mrets\">ONS Trade in goods MRETS</a></td>\n",
       "      <td>264270</td>\n",
       "      <td>7929621</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fons-bop-individual-country-data\">ONS BoP Individual Country Data</a></td>\n",
       "      <td>80756</td>\n",
       "      <td>1777303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fons-pink-book-chapter-3\">ONS Pink Book Chapter 3</a></td>\n",
       "      <td>5378</td>\n",
       "      <td>150770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fhmrc-uk-trade-in-goods-statistics-by-business-characteristics-2015\">HMRC UK Trade in Goods Statistics by Business Characteristics 2015</a></td>\n",
       "      <td>947</td>\n",
       "      <td>27243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fons-abs\">ONS ABS</a></td>\n",
       "      <td>648</td>\n",
       "      <td>54616</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td><a href=\"http://gss-data.org.uk/resource?uri=http%3A%2F%2Fgss-data.org.uk%2Fdata%2Fons-balance-of-payments\">ONS Balance of Payments</a></td>\n",
       "      <td>396</td>\n",
       "      <td>10593</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from urllib.parse import urlencode\n",
    "def gss_url(uri):\n",
    "    return 'http://gss-data.org.uk/resource?' + urlencode({\n",
    "        'uri': uri\n",
    "    })\n",
    "\n",
    "table['Dataset'] = table.apply(lambda x: f'<a href=\"{gss_url(x.name)}\">{x.Label}</a>', axis=1)\n",
    "table.drop(columns=['Label'], inplace=True)\n",
    "table = table[['Dataset', 'Observations', 'Triples']]\n",
    "pd.set_option('max_colwidth', -1)\n",
    "with open('dataset-stats.html', 'w') as f:\n",
    "    f.write(table.to_html(escape=False, index=False))\n",
    "HTML(table.to_html(escape=False, index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}