# --- # jupyter: # jupytext: # text_representation: # extension: .py # format_name: light # format_version: '1.5' # jupytext_version: 1.3.4 # kernelspec: # display_name: Python 3 # language: python # name: python3 # --- # + # !pip install airtable-python-wrapper # !pip install python-jenkins # !pip install matplotlib from airtable import Airtable from jenkins import Jenkins, JenkinsException # + from os import environ JENKINS_USER = environ.get('JENKINS_USER') JENKINS_TOKEN = environ.get('JENKINS_TOKEN') AIRTABLE_TOKEN = environ.get('AIRTABLE_TOKEN') AIRTABLE_BASE = environ.get('AIRTABLE_BASE') from collections import namedtuple COGS = namedtuple('Airtable', 'sources families superfamilies producers types') at = COGS._make([ { record['id']: record['fields'] for record in Airtable( AIRTABLE_BASE, table_name, api_key=AIRTABLE_TOKEN).get_all() } for table_name in ['Source Data', 'Family', 'Superfamily', 'Dataset Producer', 'Type'] ]) # + import pandas as pd datasets = [] for source_id, source in at.sources.items(): if 'Family' in source: datasets.append([ at.superfamilies[at.families[source['Family'][0]]['Superfamily'][0]]['Name'], at.families[source['Family'][0]]['Name'], source['Name'], source.get('Stage', '') ]) table = pd.DataFrame(datasets, columns=('Superfamily', 'Family', 'Title', 'Stage')) table # + table = table.groupby(['Superfamily', 'Family'])['Stage'].value_counts().reset_index(name='Count') table #+ table \ .pivot_table(index='Family', columns='Stage', values='Count', fill_value=0) \ .loc[:,['Backlog','Candidate', 'Prioritized', 'Published']] \ .plot.bar(stacked=True, figsize=(10,7), color=[ 'xkcd:burnt orange', 'xkcd:denim', 'xkcd:sky blue', 'xkcd:pastel green'])