# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.3.4
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
from airtable import Airtable
from jenkins import Jenkins, JenkinsException
# +
from os import environ
JENKINS_USER = environ.get('JENKINS_USER')
JENKINS_TOKEN = environ.get('JENKINS_TOKEN')
AIRTABLE_TOKEN = environ.get('AIRTABLE_TOKEN')
AIRTABLE_BASE = environ.get('AIRTABLE_BASE')
from collections import namedtuple
COGS = namedtuple('Airtable', 'sources families superfamilies producers types')
at = COGS._make([
{ record['id']: record['fields'] for record in Airtable(
AIRTABLE_BASE, table_name, api_key=AIRTABLE_TOKEN).get_all()
} for table_name in ['Source Data', 'Family', 'Superfamily', 'Dataset Producer', 'Type']
])
# +
import pandas as pd
datasets = []
for source_id, source in at.sources.items():
if 'Family' in source:
datasets.append([
at.superfamilies[at.families[source['Family'][0]]['Superfamily'][0]]['Name'],
at.families[source['Family'][0]]['Name'],
source['Name'],
source.get('Stage', '')
])
table = pd.DataFrame(datasets, columns=('Superfamily', 'Family', 'Title', 'Stage'))
table
# -
table = table.groupby(['Superfamily', 'Family'])['Stage'].value_counts().reset_index(name='Count')
table
table \
.pivot_table(index='Family', columns='Stage', values='Count', fill_value=0) \
.loc[:,['Backlog','Candidate', 'Prioritized', 'Published']] \
.plot.bar(stacked=True, figsize=(10,7), color=[
'xkcd:burnt orange', 'xkcd:denim', 'xkcd:sky blue', 'xkcd:pastel green'])