diff --git a/README.md b/README.md new file mode 100644 index 0000000..09e1028 --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +SPARQL test runner +================== + +SPARQL can be used to validate assumptions made when curating data. +This project goes through a given 'tests' directory looking for files +ending in .sparql, and runs the queries against a given RDF dataset or +datasets. + +The results of each query should either be empty - for SELECT style +queries, or TRUE for ASK style queries. More complex results can be +checked by creating a file with the expected results and giving it a +.expected suffix. The actual results are checked against the expected +results - note that the output is in CSV format, so line endings are +as for DOS (carriage return, linefeed). + +The result of running the SPARQL test runner will be a testresults.xml +file in the current directory, in the style of jUnit XML. Errors are +also output to stdout/stderr for humans. The exit code of the test +runner will be 0 for success, and 1 if there are any errors. + +The project essentially bundles the Apache Jena ARQ libraries along +with a simple test runner. + +The project can be built as a standalone "fat JAR", containing all +dependencies such that it can be used directly without having to +install anything other than Java. Java version 7 up is supported. + +The resulting standalone file can also be used to simply run any +SPARQL 1.1 query on any local or remote data, including using the +SERVICE keyword to mix together external SPARQL endpoints with local +data, etc. + +Building +-------- + +Build using the Scala Build Tool [1]. To build the fat JAR, run 'sbt +assembly'. This will result in a single executable file under +target/scala-2.11/sparql, which can be copied wherever needed. The +file has a prolog invoking Bash to run Java on the embedded JAR. + +Running +------- + +Running the 'sparql' executable is the same as running Apache Jena's +sparql command, but just includes all dependent JARs and classes. + +The test runner can be run as follows: + + java -cp sparql uk.org.floop.sparqlTestRunner.Run + +and will describe usage: + + Usage: sparql-testrunner [options] ... + + -t | --testdir + location of SPARQL queries to run, defaults to tests/sparql + ... + data to run the queries against + +The SPARQL queries are expected to live under the tests/sparql +directory, which will be recursed into looking for files ending with +.sparql. + +The RDF data can be in any format recognizable by Apache Jena, +including the quads formats. To keep things simple, quads are treated +as triples so that the queries range over the union of all graphs. + +[1] http://www.scala-sbt.org/ diff --git a/build.sbt b/build.sbt new file mode 100644 index 0000000..d355a24 --- /dev/null +++ b/build.sbt @@ -0,0 +1,28 @@ +import sbtassembly.AssemblyPlugin.defaultShellScript + +name := "sparql-test-runner" + +version := "1.2" + +scalaVersion := "2.12.4" + +// scalacOptions += "-target:jvm-1.7" + +libraryDependencies ++= Seq( + "org.apache.jena" % "jena-arq" % "3.6.0", + "com.github.scopt" %% "scopt" % "3.7.0", + "org.scala-lang.modules" %% "scala-xml" % "1.0.6" +) + +mainClass in assembly := Some("arq.sparql") + +assemblyMergeStrategy in assembly := { + case PathList("org", "apache", "commons", "logging", xs @ _*) => MergeStrategy.first + case x => + val oldStrategy = (assemblyMergeStrategy in assembly).value + oldStrategy(x) +} + +assemblyOption in assembly := (assemblyOption in assembly).value.copy(prependShellScript = Some(defaultShellScript)) + +assemblyJarName in assembly := "sparql" diff --git a/project/assembly.sbt b/project/assembly.sbt new file mode 100644 index 0000000..09c90ca --- /dev/null +++ b/project/assembly.sbt @@ -0,0 +1 @@ +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6") \ No newline at end of file diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..d1798b3 --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version = 1.0.4 \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 0000000..14a6ca1 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1 @@ +logLevel := Level.Warn \ No newline at end of file diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties new file mode 100644 index 0000000..de56a2b --- /dev/null +++ b/src/main/resources/log4j.properties @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +log4j.rootLogger=INFO, stderr + +# Set up logging to include a file record of the output +# Note: the file is always created, even if there is +# no actual output. +# log4j.rootLogger=INFO, stderr, R + +# Base of all Jena classes +log4j.logger.com.hp.hpl.jena=INFO + +# Example of switching on debug level logging for part of tree +# log4j.logger.com.hp.hpl.jena.graph.test=debug +# log4j.logger.com.hp.hpl.jena.reasoner=debug +# log4j.logger.com.hp.hpl.jena.reasoner.test=debug + +# Log format to standard out +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +# Pattern to output the caller's file name and line number. +log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n + +# Log format to standard error +log4j.appender.stderr=org.apache.log4j.ConsoleAppender +log4j.appender.stderr.target=System.err +log4j.appender.stderr.layout=org.apache.log4j.PatternLayout +# Pattern to output the caller's file name and line number. +log4j.appender.stderr.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n + +# File based log output +log4j.appender.R=org.apache.log4j.RollingFileAppender +log4j.appender.R.File=jena2.log +log4j.appender.R.MaxFileSize=5000KB +# Keep one backup file +log4j.appender.R.MaxBackupIndex=1 +log4j.appender.R.layout=org.apache.log4j.PatternLayout +log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n diff --git a/src/main/scala/uk/org/floop/sparqlTestRunner/Run.scala b/src/main/scala/uk/org/floop/sparqlTestRunner/Run.scala new file mode 100644 index 0000000..301791a --- /dev/null +++ b/src/main/scala/uk/org/floop/sparqlTestRunner/Run.scala @@ -0,0 +1,149 @@ +package uk.org.floop.sparqlTestRunner + +import java.io._ +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Path} + +import org.apache.jena.query.{DatasetFactory, QueryExecutionFactory, QueryFactory, ResultSetFormatter} +import org.apache.jena.rdf.model.{Model, ModelFactory} +import org.apache.jena.riot.RDFDataMgr + +import scala.collection.JavaConversions._ +import scala.io.Source +import scala.xml.{NodeSeq, XML} + +case class Config(dir: File = new File("tests/sparql"), + report: File = new File("reports/TESTS-sparql-test-runner.xml"), + data: Seq[File] = Seq()) + +object Run extends App { + val packageVersion: String = getClass.getPackage.getImplementationVersion() + val parser = new scopt.OptionParser[Config]("sparql-testrunner") { + head("sparql-testrunner", packageVersion) + opt[File]('t', "testdir") optional() valueName "" action { (x, c) => + c.copy(dir = x) + } text "location of SPARQL queries to run, defaults to tests/sparql" + opt[File]('r', "report") optional() valueName "" action { (x, c) => + c.copy(report = x) + } text "file to output XML test report, defaults to reports/TESTS-sparql-test-runner.xml" + arg[File]("...") unbounded() required() action { (x, c) => + c.copy(data = c.data :+ x) } text "data to run the queries against" + } + parser.parse(args, Config()) match { + case Some(config) => + val dataset = DatasetFactory.create + for (d <- config.data) { + RDFDataMgr.read(dataset, d.toString) + } + val union = ModelFactory.createDefaultModel + union.add(dataset.getDefaultModel) + union.add(dataset.getUnionModel) + dataset.close() + val (error, results) = runTestsUnder(config.dir, union, config.dir.toPath) + for (dir <- Option(config.report.getParentFile)) { + dir.mkdirs + } + XML.save(config.report.toString, {results}, "UTF-8", true) + System.exit(error match { + case true => 1 + case false => 0 + }) + case None => + } + + def runTestsUnder(dir: File, model: Model, root: Path): (Boolean, NodeSeq) = { + var testSuites = NodeSeq.Empty + var testCases = NodeSeq.Empty + var overallError = false + var errors = 0 + var skipped = 0 + var tests = 0 + val timeSuiteStart = System.currentTimeMillis() + var subSuiteTimes = 0L + for (f <- dir.listFiles) yield { + if (f.isDirectory) { + val subSuiteStart = System.currentTimeMillis() + val (error, subSuites) = runTestsUnder(f, model, root) + testSuites ++= subSuites + overallError |= error + subSuiteTimes += (System.currentTimeMillis() - subSuiteStart) + } else if (f.isFile && f.getName.endsWith(".sparql")) { + val timeTestStart = System.currentTimeMillis() + val relativePath = root.relativize(f.toPath).toString + val className = relativePath.substring(0, relativePath.lastIndexOf('.')).replace(File.pathSeparatorChar, '.') + val comment = { + val queryLines = Source.fromFile(f).getLines() + if (queryLines.hasNext) { + val line = queryLines.next() + if (line.startsWith("# ")) + line.substring(2) + else + line + } else + className + } + tests += 1 + val query = QueryFactory.create(new String(Files.readAllBytes(f.toPath), StandardCharsets.UTF_8)) + val exec = QueryExecutionFactory.create(query, model) + if (query.isSelectType) { + var results = exec.execSelect() + var nonEmptyResults = results.hasNext() + val timeTaken = (System.currentTimeMillis() - timeTestStart).toFloat / 1000 + testCases = testCases ++ + { + val out = new ByteArrayOutputStream + ResultSetFormatter.outputAsCSV(out, results) + val actualResults = out.toString("utf-8") + val expect = new File(f.getPath.substring(0, f.getPath.lastIndexOf('.')) + ".expected") + if (expect.exists && expect.isFile) { + val expectedResults = new String(Files.readAllBytes(expect.toPath), StandardCharsets.UTF_8) + if (actualResults != expectedResults) { + errors += 1 + System.err.println(s"Testcase $comment\nExpected:\n${expectedResults}\nActual:\n${actualResults}") + + } + } else { + // assume there should be no results + if (nonEmptyResults) { + errors += 1 + System.err.println(s"Testcase $comment\nExpected empty result set, got:\n${actualResults}") + + } + } + } + + } else if (query.isAskType) { + var result = exec.execAsk() + val timeTaken = (System.currentTimeMillis() - timeTestStart).toFloat / 1000 + testCases = testCases ++ { + if (!result) { + errors += 1 + System.err.println(s"Testcase $comment\nExpected ASK query to return TRUE") + }} + } else { + skipped += 1 + System.out.println(s"Skipped testcase $comment") + testCases = testCases ++ + + + } + } + } + if (errors > 0) { + overallError = true + } + val testSuiteTime = (System.currentTimeMillis() - timeSuiteStart - subSuiteTimes).toFloat / 1000 + val suiteName = { + val relativeName = root.relativize(dir.toPath).toString + if (relativeName.length == 0) { + "root" + } else { + relativeName + } + } + (overallError, testSuites ++ + {testCases} + ) + } +}