Skip to content

Instantly share code, notes, and snippets.

@lambdazen
Last active June 29, 2018 20:32
Show Gist options
  • Save lambdazen/9146827 to your computer and use it in GitHub Desktop.
Save lambdazen/9146827 to your computer and use it in GitHub Desktop.
Quick and Dirty Groovy/Gremlin Schema Checker
// You can run this code in a Gremlin shell. Tested with Tinkerpop 2.4.0
// A sample graph -- See https://github.com/tinkerpop/blueprints/wiki/Property-Graph-Model
g = TinkerGraphFactory.createTinkerGraph()
// The schema for the sample graph -- See http://lambdazen.blogspot.com/2014/01/do-property-graphs-have-schemas.html
sg = new TinkerGraph()
person = sg.addVertex()
person.setProperty('_label', 'person')
person.setProperty('name', 'java.lang.String')
person.setProperty('age', 'java.lang.Integer')
software = sg.addVertex()
software.setProperty('_label', 'software')
software.setProperty('name', 'java.lang.String')
software.setProperty('lang', 'java.lang.String')
knows = person.addEdge('knows', person)
knows.setProperty('weight', 'java.lang.Float')
created = person.addEdge('created', software)
created.setProperty('weight', 'java.lang.Float')
created.setProperty('_minIn', 1) // Someone must create the software
///////// Groovy functions to validate a schema
// This function will be different for each graph in Tinkerpop2 -- CUSTOMIZE APPROPRIATELY
vertexType = {v, sg ->
// In Tinkerpop3, this function can be written as sg.V('label', v.label) in Tinkerpop3
// But for now, the presence of the 'age' property determines whether the vertex is a
// Person or Software
result = v.age ? sg.V('_label', 'person').next() : sg.V('_label', 'software').next()
if (result == null) throw new Exception("Missing schema definition for ${v}");
result
}
// DONE with custom code. Everything below applies to all graphs and schemas
// Edge type can be computed from the vertex type
edgeType = {e, sg ->
// Find the schema edge from outVertex to inVertex with the given label
result = vertexType(e.outV.next(), sg).outE(e.label).filter({ it.inV.next().equals(vertexType(e.inV.next(), sg)) }).next()
if (result == null) throw new Exception("Missing schema definition for ${e}");
result
}
// Make sure that the given element matches the given schema element for the given key
checkProperty = { v, sv, key ->
// Special schema property keys like _label, _minOut, _maxOut, _minIn, _maxIn can be ignored
if (key.startsWith('_')) return;
// Find the class in the schema
clazz = Class.forName(sv.getProperty(key))
// Optional elements are defined in the schema with a '?' at the end of the key name.
// For e.g., age? means age is optional
optional = key.endsWith('?')
if (optional) {
key = key.substring(0, key.length() - 1)
}
// Find the value in the element
value = v.getProperty(key)
if (!value) {
// Value is absent in the element -- Is that OK?
if (!optional) throw new Exception("Element ${v} has a missing key ${key}")
} else if (!clazz.isInstance(value)) {
// Value is present in the elemtn -- Does the class match?
throw new Exception("Element ${v} has an invalid key ${key} with value ${value} which doesn't match class ${clazz}")
}
}
// Make sure that all the properties in the element match the given schema element
checkProperties = { v, sv ->
// Go through keys in the schema and validate
schemaKeys = sv.getPropertyKeys()
for (key in schemaKeys) {
checkProperty(v, sv, key)
}
// Make sure there is nothing extra
extraKeys = new ArrayList(v.getPropertyKeys())
extraKeys.removeAll(schemaKeys)
if (extraKeys) {
throw new Exception("Element ${v} has an additional keys ${extraKeys} that are not defined in the schema")
}
}
// Check to see that an edge is valid
checkEdge = { e, sg ->
se = edgeType(e, sg);
checkProperties(e, se)
}
// Make sure that the cardinalities of in and out edges for a vertex match the schema
checkCardinality = { v, sv ->
// Find the out edges in the schema
for (se in sv.outE) {
// The label and cardinality...
label = se.label
card = v.outE(label).count()
// ... should be within the min and max if defined
if ((se._minOut && card < se._minOut) || (se._maxOut && card > se._maxOut)) {
throw new Exception("Vertex ${v} has an invalid OUT cardinality ${card} for label ${label}. Should be between ${se._minOut ?: '0'} and ${se._maxOut ?: 'Inf'}")
}
}
// Same process for out
for (se in sv.inE) {
label = se.label
card = v.inE(label).count()
if ((se._minIn && card < se._minIn) || (se._maxIn && card > se._maxIn)) {
throw new Exception("Vertex ${v} has an invalid IN cardinality ${card} for label ${label}. Should be between ${se._minIn ?: '0'} and ${se._maxIn ?: 'Inf'}")
}
}
}
// Check to see that the vertex is valid
checkVertex = { v, sg ->
// Find the vertex definition in the schema graph
sv = vertexType(v, sg)
// And validate the properties and cardinalities
checkProperties(v, sv)
checkCardinality(v, sv)
// Now check the out edges for properties
for (e in v.outE()) {
checkEdge(e, sg)
}
// Returning true for compatibility with filter
true
}
// Now do all the checks on the graph g with the schema sg
g.V.filter({checkVertex(it, sg) })
@lambdazen
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment