Simple rule
You can use simple rules to filter a condition on a single column in a single table.
Example #1
This example shows you how to create a simple SQL rule named simple_sql_rule.
Code | Description |
---|---|
rule.setRuleNm("simple_sql_rule") | Add the name of the given rule. |
rule.setRuleValue("startDate < '2011-11-01'") |
Set the simple SQL expression. |
rule.setRuleType("SQLG") | Set the rule type. |
Code
example_simple_sql_rule.scala
Copy
import com.owl.core.Owl
import com.owl.core.util.OwlUtils
import com.owl.common.bll.{RuleBll, RuleTemplateBll}
import com.owl.common.domain2.Rule
import com.owl.common.options.{LoadOpt, OwlOptions}
import org.junit.{Assert, Test}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
val loadOptions = new LoadOpt {
pghost = "localhost:5432/postgres"
pguser = "username"
pgpassword = "password"
}
//----- Init Spark ----- //
def sparkInit(): SparkSession = {
val sparkSession = SparkSession.builder
.master("local")
.appName("test")
.getOrCreate()
sparkSession
}
def simpleRuleNotebook(): Unit = {
// Arrange
val spark = sparkInit()
import spark.implicits._
val headers = "firstName,lastName,startDate"
val source = Seq(
("Thomas", "Martinez", "2010-11-01"),
("Harry", "Williams", "2012-05-01"),
("Ethan", "Davis", "2009-08-01")
)
val arr = headers.split(",")
val df = source.toDF(arr: _*)
val opt = new OwlOptions {
runId = "2019-09-20"
dataset = "simple_sql_rule_ds"
load = loadOptions
}
val rule = new Rule {
setDataset(opt.dataset)
setRuleNm("simple_sql_rule")
setRuleValue("startDate < '2011-11-01'")
setRuleType("SQLG")
setPerc(1.0)
setPoints(1)
setIsActive(1)
setUserNm("admin")
}
val cdq = OwlUtils.OwlContext(df, opt)
.register(opt)
OwlUtils.addRule(rule)
// Act
cdq.owlCheck()
// Assert
import scala.collection.JavaConversions
val hootRule = JavaConversions.asScalaBuffer(owl.hoot.rules).find(x => rule.getRuleNm.equals(x.getRuleNm)).orNull
}
// Execute notebook
simpleRuleNotebook()
You can do multiple assertions on the result of the DQ Check process.
The owl.hoot parameter will provide access to the execution results, in this case for the rule.
Example #2
This example shows you how to create a simple SQL rule with templates named simple_sql_rule_with_template.
Steps
-
Create the rule template, where the template column name should be marked with $colNm string.\
val ruleTemplate = RuleTemplateBll.createRuleTemplate( "not_null_or_empty", "Column cannot contain null or empty values", " $colNm is null or $colNm = \'\' or $colNm = \'null\' " )
-
Create the Rule instance, where value of RuleValue will be used to replace $colNm in the template expression.\
val rule = RuleBll.createRule(opt.dataset) rule.setRuleNm("is_city_not_null_or_empty") rule.setRuleValue("city") rule.setRuleType("CUSTOM") // legacy type required to look into rule repo rule.setRuleRepo("not_null_or_empty") // custom rule name to pull rule value from rule repo rule.setPerc(1.0) rule.setPoints(1) rule.setIsActive(1) rule.setUserNm("admin")
Code
Copy
import com.owl.core.Owl
import com.owl.core.util.OwlUtils
import com.owl.common.bll.{RuleBll, RuleTemplateBll}
import com.owl.common.domain2.Rule
import com.owl.common.options.{LoadOpt, OwlOptions}
import org.junit.{Assert, Test}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
val loadOptions = new LoadOpt {
pghost = "localhost:5432/postgres"
pguser = "username"
pgpassword = "password"
}
//----- Init Spark ----- //
def sparkInit(): SparkSession = {
val sparkSession = SparkSession.builder
.master("local")
.appName("test")
.getOrCreate()
sparkSession
}
def simpleRuleWithTemplate(): Unit = {
// Arrange
val spark = sparkInit()
import spark.implicits._
val headers = "firstName,lastName,city"
val source = Seq(
("Thomas", "Martinez", ""),
("Harry", "Williams", null),
("Ethan", "Davis", "Los Angeles")
)
val arr = headers.split(",")
val df = source.toDF(arr: _*)
val opt = new OwlOptions {
runId = "2019-09-20"
dataset = "simple_sql_rule_with_template_ds"
onReadOnly = false
load = loadOptions
}
// create a generic rule
val ruleRepoName = "not_null_or_empty"
val ruleRepo = OwlUtils.createRuleTemplate(ruleRepoName, "Column cannot contain null or empty values", "$colNm is null or $colNm = \'\' or $colNm = \'null\'")
Util.RuleRepoDaoFactory().delete(ruleRepo)
val cdq = OwlUtils.OwlContext(df, opt)
cdq.addRuleTemplate(ruleRepo)
val rule = OwlUtils.createRule(opt.dataset)
rule.setRuleNm("is_city_not_null_or_empty")
rule.setRuleValue("city")
rule.setRuleType("CUSTOM") // legacy type required to look into rule repo
rule.setRuleRepo("not_null_or_empty") // custom rule name to pull rule value from rule repo
rule.setPerc(1.0)
rule.setPoints(1)
rule.setIsActive(1)
rule.setUserNm("admin")
val cdq = OwlUtils.OwlContext(df, opt)
.register(opt)
cdq.addRule(rule)
// Act
owl.owlCheck()
// Assert
import scala.collection.JavaConversions
val hootRule = JavaConversions.asScalaBuffer(owl.hoot.rules).find(x => rule.getRuleNm.equals(x.getRuleNm)).orNull
println(hootRule.getScore)
}
// Execute notebook
simpleRuleWithTemplate()