comparison src/main/java/org/monetdb/util/MDBvalidator.java @ 778:12e076445cd6

Extended Not Null violation check also with empty string checks for some of the syte tables varchar columns.
author Martin van Dinther <martin.van.dinther@monetdbsolutions.com>
date Wed, 06 Sep 2023 21:31:42 +0200 (20 months ago)
parents aeb268156580
children e890195256ac
comparison
equal deleted inserted replaced
777:5788507c01b9 778:12e076445cd6
23 /** 23 /**
24 * <pre>MonetDB Data Integrity Validator class (MDBvalidator) can 24 * <pre>MonetDB Data Integrity Validator class (MDBvalidator) can
25 * a) validate system tables data integrity in system schemas: sys and tmp 25 * a) validate system tables data integrity in system schemas: sys and tmp
26 * this includes violations of: 26 * this includes violations of:
27 * primary key uniqueness 27 * primary key uniqueness
28 * primary key column(s) not null 28 * primary key column(s) not null and when varchar not empty string
29 * unique constraint uniqueness 29 * unique constraint uniqueness
30 * foreign key referential integrity 30 * foreign key referential integrity
31 * column not null 31 * column not null and some varchar columns not empty string
32 * column maximum length for char/varchar/clob/blob/json/url columns which have max length &gt; 0 32 * column maximum length for char/varchar/clob/blob/json/url columns which have max length &gt; 0
33 * b) validate user schema tables &amp; columns data integrity based on available meta data from system tables &amp; system views 33 * b) validate user schema tables &amp; columns data integrity based on available meta data from system tables &amp; system views
34 * primary key uniqueness 34 * primary key uniqueness
35 * TODO primary key column(s) not null 35 * TODO: primary key column(s) not null
36 * unique constraint uniqueness 36 * unique constraint uniqueness
37 * foreign key referential integrity 37 * foreign key referential integrity
38 * column not null 38 * column not null
39 * column maximum length for char/varchar/clob/blob/json/url columns which have max length &gt; 0 39 * column maximum length for char/varchar/clob/blob/json/url columns which have max length &gt; 0
40 * 40 *
41 * More possible validations for future 41 * More possible validations for future
42 * col char/varchar/clob/blob/json/url minimum length (some columns may not be empty, so length &gt;= 1) 42 * col char/varchar/clob/blob/json/url minimum length (some columns may not be empty, so length &gt;= 1)
43 * col with sequence (serial/bigserial/autoincrement) in range (0/1/min_value .. max_value) 43 * col with sequence (serial/bigserial/autoincrement) in range (0/1/min_value .. max_value)
44 * col value is valid in domain (date/time/timestamp/json/inet/url/uuid/...) 44 * col value is valid in domain (date/time/timestamp/json/inet/url/uuid/...)
45 * col in list checks (some columns may have only certain values which are not stored in a table or view (eg as fk)) 45 * col in list-of-values checks (some columns may have only certain values which are not stored in a table or view (eg as fk))
46 SELECT * FROM sys.table_partitions WHERE "type" NOT IN (5,6,9,10); -- 5=By Column Range (1+4), 6=By Expression Range (2+4), 9=By Column Value (1+8), 10=By Expression Value (2+8), see sql_catalog.h #define PARTITION_*. Note table_partitions is introduced in Apr2019 "33" 46 SELECT * FROM sys.table_partitions WHERE "type" NOT IN (5,6,9,10); -- 5=By Column Range (1+4), 6=By Expression Range (2+4), 9=By Column Value (1+8), 10=By Expression Value (2+8), see sql_catalog.h #define PARTITION_*. Note table_partitions is introduced in Apr2019 "33"
47 * col conditional checks (column is not null when other column is (not) null) 47 * col conditional checks (column is not null when other column is (not) null)
48 -- i.e.: either column_id or expression in sys.table_partitions must be populated 48 -- i.e.: either column_id or expression in sys.table_partitions must be populated
49 SELECT "column_id", "expression", 'Missing either column_id or expression' AS violation, * FROM "sys"."table_partitions" WHERE "column_id" IS NULL AND "expression" IS NULL; 49 SELECT "column_id", "expression", 'Missing either column_id or expression' AS violation, * FROM "sys"."table_partitions" WHERE "column_id" IS NULL AND "expression" IS NULL;
50 SELECT "column_id", "expression", 'column_id and expression may not both be populated. One of them must be NULL' AS violation, * FROM "sys"."table_partitions" WHERE "column_id" IS NOT NULL AND "expression" IS NOT NULL; 50 SELECT "column_id", "expression", 'column_id and expression may not both be populated. One of them must be NULL' AS violation, * FROM "sys"."table_partitions" WHERE "column_id" IS NOT NULL AND "expression" IS NOT NULL;
534 multicolumn = col.contains(", "); // some pkeys consist of multiple columns 534 multicolumn = col.contains(", "); // some pkeys consist of multiple columns
535 isNullCond.setLength(0); // empty previous content 535 isNullCond.setLength(0); // empty previous content
536 if (multicolumn) { 536 if (multicolumn) {
537 String[] cols = col.split(", "); 537 String[] cols = col.split(", ");
538 for (int c = 0; c < cols.length; c++) { 538 for (int c = 0; c < cols.length; c++) {
539 final String colx = cols[c];
539 if (c > 0) { 540 if (c > 0) {
540 isNullCond.append(" OR "); 541 isNullCond.append(" OR ");
541 } 542 }
542 isNullCond.append(cols[c]).append(" IS NULL"); 543 isNullCond.append(colx).append(" IS NULL");
544
545 /* for some sys/tmp columns also check for empty strings */
546 if (colx.endsWith("name") || colx.endsWith("keyword")
547 || "schema".equals(colx) || "table".equals(colx) || "column".equals(colx) || "func".equals(colx) || "mod".equals(colx) || "statement".equals(colx)
548 || ("type".equals(colx) && ("_columns".equals(tbl) || "args".equals(tbl) || "storage()".equals(tbl) || "storagemodelinput".equals(tbl)))) {
549 isNullCond.append(" OR ").append(colx).append(" = ''");
550 }
543 } 551 }
544 } else { 552 } else {
545 isNullCond.append(col).append(" IS NULL"); 553 isNullCond.append(col).append(" IS NULL");
554
555 /* for some sys/tmp columns also check for empty strings */
556 if (col.endsWith("name") || col.endsWith("keyword")
557 || "schema".equals(col) || "table".equals(col) || "column".equals(col) || "func".equals(col) || "mod".equals(col) || "statement".equals(col)
558 || ("type".equals(col) && ("_columns".equals(tbl) || "args".equals(tbl) || "storage()".equals(tbl) || "storagemodelinput".equals(tbl)))) {
559 isNullCond.append(" OR ").append(col).append(" = ''");
560 }
546 } 561 }
547 // reuse the StringBuilder by cleaning it partial 562 // reuse the StringBuilder by cleaning it partial
548 sb.setLength(qry_len); 563 sb.setLength(qry_len);
549 sb.append(col) 564 sb.append(col)
550 .append(", * FROM ").append(schema).append('.').append(tbl) 565 .append(", * FROM ").append(schema).append('.').append(tbl)