diff --git a/owl_classification.py b/owl_classification.py new file mode 100755 index 0000000..9a407d0 --- /dev/null +++ b/owl_classification.py @@ -0,0 +1,79 @@ +#!/bin/env python3 +import argparse +import csv +from urllib.parse import urljoin + +from rdflib import Graph, URIRef, RDF, OWL, BNode, Literal, RDFS +from rdflib.collection import Collection + +g = Graph() +g.bind('owl', OWL) + +parser = argparse.ArgumentParser(description='Create statistical classification as OWL') +parser.add_argument( + 'codelist', + type=argparse.FileType('r'), + help='Codelist CSV file.') +parser.add_argument( + 'classification', + help='Base URI for this classification.') +parser.add_argument( + 'codes', + help='Base URI for the codelist.') +parser.add_argument( + 'property', + help='Defining property.') + +args = parser.parse_args() +reader = csv.DictReader(args.codelist) +parent2children = {} +notation2label = {} +notation2comment = {} +for row in reader: + if 'Parent Notation' in row: + notation2label[row['Notation']] = row['Label'] + if 'Description' in row and row['Description'] is not None and row['Description'] != '': + notation2comment[row['Notation']] = row['Description'] + pn = row['Parent Notation'] + if pn == '': + continue + if pn in parent2children: + parent2children[pn].append(row['Notation']) + else: + parent2children[pn] = [row['Notation']] + +defined = set() + +for parent, children in parent2children.items(): + parentNode = URIRef(urljoin(args.classification, parent)) + if parentNode not in defined: + g.add((parentNode, RDF.type, OWL.Class)) + g.add((parentNode, RDFS.label, Literal(notation2label[parent], lang='en-gb'))) + if parent in notation2comment: + g.add((parentNode, RDFS.comment, Literal(notation2comment[parent], lang='en-gb'))) + restriction = BNode() + g.add((parentNode, OWL.equivalentClass, restriction)) + g.add((restriction, RDF.type, OWL.Restriction)) + g.add((restriction, OWL.onProperty, URIRef(args.property))) + g.add((restriction, OWL.hasValue, URIRef(urljoin(args.codes, parent)))) + defined.add(parentNode) + childrenNode = BNode() + g.add((parentNode, OWL.disjointUnionOf, childrenNode)) + childNodes = [] + for child in children: + childNode = URIRef(urljoin(args.classification, child)) + childNodes.append(childNode) + if childNode not in defined: + g.add((childNode, RDF.type, OWL.Class)) + g.add((childNode, RDFS.label, Literal(notation2label[child], lang='en-gb'))) + if child in notation2comment: + g.add((childNode, RDFS.comment, Literal(notation2comment[child], lang='en-gb'))) + restriction = BNode() + g.add((childNode, OWL.equivalentClass, restriction)) + g.add((restriction, RDF.type, OWL.Restriction)) + g.add((restriction, OWL.onProperty, URIRef(args.property))) + g.add((restriction, OWL.hasValue, URIRef(urljoin(args.codes, childam)))) + defined.add(childNode) + c = Collection(g, childrenNode, childNodes) + +print(g.serialize(format='turtle').decode('utf-8'))