diff --git a/CHANGELOG.md b/CHANGELOG.md index 016b462bb..cd0697d86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add [`validate`] command [#691] + ### Fixed - Handle empty [`template`] property charactersitics in [#719] @@ -193,11 +196,13 @@ First official release of ROBOT! [`repair`]: http://robot.obolibrary.org/repair [`report`]: http://robot.obolibrary.org/report [`template`]: http://robot.obolibrary.org/template +[`validate`]: http://robot.obolibrary.org/validate [#719]: https://github.com/ontodev/robot/pull/716 [#715]: https://github.com/ontodev/robot/pull/715 [#710]: https://github.com/ontodev/robot/pull/710 [#709]: https://github.com/ontodev/robot/issues/709 +[#691]: https://github.com/ontodev/robot/pull/691 [#689]: https://github.com/ontodev/robot/pull/689 [#685]: https://github.com/ontodev/robot/pull/685 [#671]: https://github.com/ontodev/robot/pull/671 diff --git a/docs/examples/immune_exposures.csv b/docs/examples/immune_exposures.csv new file mode 100644 index 000000000..55cbfc672 --- /dev/null +++ b/docs/examples/immune_exposures.csv @@ -0,0 +1,14 @@ +exposure process reported,exposure material reported,exposure material id,disease reported,disease ontology id,disease stage reported +is-required; subclass-of 'exposure process';,subclass-of|equivalent-to 'material entity'; equivalent-to %3; is-required (when %1 subclass-of ('administering substance in vivo' or 'exposure to substance without evidence for disease' or 'occurrence of infectious disease' or 'occurrence of allergic disease')); is-excluded (when %1 equivalent-to 'occurrence of disease'); is-excluded (when %1 subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer' or 'no exposure' or unknown)),equivalent-to %2,subclass-of disease; subclass-of 'has material basis in' some %2 (when %1 not-subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer')); equivalent-to %5; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 equivalent-to 'administering substance in vivo'); is-excluded (when %1 subclass-of (vaccination or 'transplant or transfusion' or 'exposure to substance without evidence for disease' or 'no exposure' or unknown)),equivalent-to %4,subclass-of 'disease stage'; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 not-subclass-of 'occurrence of disease') +unknown,,,,, +administering substance in vivo,Hepacivirus C,NCBITaxon:11103,,, +exposure to substance without evidence for disease,Dengue virus,NCBITaxon:12637,,, +occurrence of disease,,,,, +occurrence of infectious disease,Dengue virus,NCBITaxon:12637,dengue hemorrhagic fever,DOID:12206,Acute/Recent onset +occurrence of infectious disease,Dengue virus,NCBITaxon:12637,,,Acute/Recent onset +occurrence of infectious disease,Dengue virus,NCBITaxon:12637,dengue hemorrhagic fever,DOID:12206, +occurrence of infectious disease,Dengue virus,NCBITaxon:11060,dengue hemorrhagic fever,DOID:12206,Acute/Recent onset +occurrence of infectious disease,Dengue virus,NCBITaxon:12637,Dengue virus 2,NCBITaxon:11060,Acute/Recent onset +('administering substance in vivo' or 'exposure to substance without evidence for disease'),blood or 'Hepacivirus C',UBERON:0000178,,,Chronic +occurrence of infectious disease,Dengue virus,NCBITaxon:12637,dengue hemorrhagic fever|wheat allergy,DOID:12206|DOID:3660,Acute/Recent onset +occurrence of infectious disease,Dengue virus,NCBITaxon:12637,dengue hemorrhagic fever|dengue hemorrhagic fever,DOID:12206|DOID:12206,Acute/Recent onset \ No newline at end of file diff --git a/docs/examples/immune_exposures.html b/docs/examples/immune_exposures.html new file mode 100644 index 000000000..41d401421 --- /dev/null +++ b/docs/examples/immune_exposures.html @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
exposure process reportedexposure material reportedexposure material iddisease reporteddisease ontology iddisease stage reported
is-required; subclass-of 'exposure process';subclass-of|equivalent-to 'material entity'; equivalent-to %3; is-required (when %1 subclass-of ('administering substance in vivo' or 'exposure to substance without evidence for disease' or 'occurrence of infectious disease' or 'occurrence of allergic disease')); is-excluded (when %1 equivalent-to 'occurrence of disease'); is-excluded (when %1 subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer' or 'no exposure' or unknown))equivalent-to %2subclass-of disease; subclass-of 'has material basis in' some %2 (when %1 not-subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer')); equivalent-to %5; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 equivalent-to 'administering substance in vivo'); is-excluded (when %1 subclass-of (vaccination or 'transplant or transfusion' or 'exposure to substance without evidence for disease' or 'no exposure' or unknown))equivalent-to %4subclass-of 'disease stage'; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 not-subclass-of 'occurrence of disease')
unknown
administering substance in vivoHepacivirus CNCBITaxon:11103
exposure to substance without evidence for diseaseDengue virusNCBITaxon:12637
occurrence of disease
occurrence of infectious diseaseDengue virusNCBITaxon:12637dengue hemorrhagic feverDOID:12206Acute/Recent onset
occurrence of infectious diseaseDengue virusNCBITaxon:12637Acute/Recent onset
occurrence of infectious diseaseDengue virusNCBITaxon:12637dengue hemorrhagic feverDOID:12206
occurrence of infectious diseaseDengue virusNCBITaxon:11060dengue hemorrhagic feverDOID:12206Acute/Recent onset
occurrence of infectious diseaseDengue virusNCBITaxon:12637Dengue virus 2NCBITaxon:11060Acute/Recent onset
'exposure to substance without evidence for disease' or 'administering substance in vivo''Hepacivirus C' or bloodUBERON:0000178Chronic
occurrence of infectious diseaseDengue virusNCBITaxon:12637dengue hemorrhagic fever|wheat allergyDOID:12206|DOID:3660Acute/Recent onset
occurrence of infectious diseaseDengue virusNCBITaxon:12637dengue hemorrhagic fever|dengue hemorrhagic feverDOID:12206|DOID:12206Acute/Recent onset
+ + diff --git a/docs/examples/immune_exposures.owl b/docs/examples/immune_exposures.owl new file mode 100644 index 000000000..cf7472de1 --- /dev/null +++ b/docs/examples/immune_exposures.owl @@ -0,0 +1,709 @@ + + + + + + + + + + + + + + + has material basis in + + + + + + + + + + + + + + Adminstering a vaccine to an organism with the intention of inducing immunity against antigen components of the vaccine. + vaccination + http://purl.obolibrary.org/obo/VO_0000002 + + + + + + + + + An unplanned process in which an organism comes into contact with a substance without evidence for a disease caused by that exposure. + exposure to substance without evidence for disease + + + + + + + + + A process in which an infectious agent is in or on the body of an organism without causing detectable disease . + occurrence of asymptomatic infection + + + + + + + + + A process in which an organism is exposed to a substance which is evident from that process having been observed or documented. + documented exposure without evidence for disease + + + + + + + + + A process in which an organism's exposure to a material entity is assumed from that material being commonly present in the environment of the organism. + environmental exposure to endemic/ubiquitous agent without evidence for disease + + + + + + + + + An organism's lifspan which does not include exposure to a substance of interest. + no exposure + + + + + + + + + An organism's lifespan for which there is no available information on an exposure to a material entity of interest. + unknown + + + + + + + + + A planned process in which solid tissue is transferred to an organism + solid tissue transplantation + + + + + + + + + A planned process in which a bodily fluid is transferred into an organism + transfusion + + + + + + + + + An occurrence of cancer where there is evidence for the presence of a cancer causing oncovirus in the tumor. + occurrence of cancer associated with virus + + + + + + + + + A planned process by which a material is intentionally given to an organism resulting in exposure of the organism to that substance. + administering substance in vivo + http://purl.obolibrary.org/obo/OBI_0600007 + + + + + + + + + Administering an infectious agent to an organism in order to test if and how an infection will occur. + infectious challenge + http://purl.obolibrary.org/obo/OBI_0000712 + + + + + + + + + Transferring a solid tissue (transplant) or bodily fluid (transfusion) to an organism. + transplant or transfusion + http://purl.obolibrary.org/obo/OBI_0000105 + + + + + + + + + The process in which an infectious disease unfolds. + occurrence of infectious disease + http://purl.obolibrary.org/obo/DOID_0050117 + http://purl.obolibrary.org/obo/OBI_1110008 + + + + + + + + + The process in which an allergic disease unfolds. + occurrence of allergic disease + http://purl.obolibrary.org/obo/DOID_1205 + http://purl.obolibrary.org/obo/OBI_1110012 + + + + + + + + + The process in which an autoimmune disease unfolds. + occurrence of autoimmune disease + http://purl.obolibrary.org/obo/DOID_417 + http://purl.obolibrary.org/obo/OBI_1110054 + + + + + + + + + The process in which cancer unfolds + occurrence of cancer + http://purl.obolibrary.org/obo/DOID_162 + http://purl.obolibrary.org/obo/OBI_1110053 + + + + + + + + + A process in which an organism is exposed to a material entity which is evident by a detectable immune reactivity against it. + exposure with existing immune reactivity without evidence for disease + http://purl.obolibrary.org/obo/OBI_1110061 + + + + + + + + + The process in which a disease unfolds. + occurrence of disease + http://purl.obolibrary.org/obo/DOID_4 + http://purl.obolibrary.org/obo/OGMS_0000031 + + + + + + + + + Post + + + + + + + + + Unknown + + + + + + + + exposure process + + + + + + + + material entity + + + + + + + + + allergic disease + + + + + + + + + + + + + + + dengue hemorrhagic fever + + + + + + + + + cancer + + + + + + + + + colon cancer + + + + + + + + + + + + + + + wheat allergy + + + + + + + + disease + + + + + + + + + autoimmune disease + + + + + + + + + acquired immunodeficiency syndrome + + + + + + + + + organism + + + + + + + + + Dengue virus 1 + + + + + + + + + Dengue virus 2 + + + + + + + + + Hepacivirus C + + + + + + + + + Dengue virus + + + + + + + + + Triticum aestivum + + + + + + + + disease stage + + + + + + + + + Acute/Recent onset + + + + + + + + + Chronic + + + + + + + + + blood + + + + + + + + + Dryvax + + + + + + + + + FluMist + + + + + + + + + Fluarix + + + + + + + + + Fluvirin + + + + + + + + + Fluzone + + + + + + + + + Menactra + + + + + + + + + Influenza A (H1N1) 2009 Monovalent Vaccine (Novartis) + + + + + + + + + Plasmodium falciparum vaccine + + + + + + + + + Pneumovax 23 (USA) + + + + + + + + + YF-Vax + + + + + + + + + Yellow fever 17D vaccine vector + + + + + + + + + Zostavax + + + + + + + + + Influenza virus vaccine + Trivalent inactivated influenza + + + + + + + + + Varicella-zoster virus vaccine + + + + + + + + + BCG Vaccine + + + + + + + + + Menveo + + + + + + + + + MVA85A + + + + + + + + + MRKAd5 HIV-1 gag/pol/nef + + + + + + + + + ACWY Vax + + + + + + + + + Stamaril + + + + + + + + + HEPLISAV-B + + + + + + + + + LC16m8 + + + + + + + + + rVSV-EBOV + + + + + + + + + 2008-2009 trivalent influenza vaccine + + + + + + + + + Engerix-B + + + + + + + + + Meningococcal Polysaccharide Vaccine, Groups A & C, Menomune A/C + + + + + + + + + vaccine + + + + + + + + + kidney transplant + + + + + + + diff --git a/docs/examples/immune_exposures.txt b/docs/examples/immune_exposures.txt new file mode 100644 index 000000000..562e98a5a --- /dev/null +++ b/docs/examples/immune_exposures.txt @@ -0,0 +1,14 @@ +At immune_exposures.csv row 6, column 4: Cell is empty but rule: "is-required true" does not allow this. +At immune_exposures.csv row 7, column 6: Cell is empty but rule: "is-required true" does not allow this. +At immune_exposures.csv row 8, column 2: Validation failed for rule: "Dengue virus equivalent-to (NCBITaxon:11060)". +At immune_exposures.csv row 8, column 3: Validation failed for rule: "NCBITaxon:11060 equivalent-to 'Dengue virus'". +At immune_exposures.csv row 9, column 4: Validation failed for rule: "Dengue virus 2 subclass-of disease". +At immune_exposures.csv row 9, column 4: Validation failed for rule: "Dengue virus 2 subclass-of 'has material basis in' some 'Dengue virus'". +At immune_exposures.csv row 10, column 2: Validation failed for rule: "blood or 'Hepacivirus C' equivalent-to (UBERON:0000178)". +At immune_exposures.csv row 10, column 3: Validation failed for rule: "UBERON:0000178 equivalent-to (blood or 'Hepacivirus C')". +At immune_exposures.csv row 10, column 6: Cell is non-empty ("Chronic") but rule: "is-excluded true" does not allow this. +At immune_exposures.csv row 11, column 4: Validation failed for rule: "wheat allergy equivalent-to (DOID:12206)". +At immune_exposures.csv row 11, column 4: Validation failed for rule: "dengue hemorrhagic fever equivalent-to (DOID:3660)". +At immune_exposures.csv row 11, column 4: Validation failed for rule: "wheat allergy subclass-of 'has material basis in' some 'Dengue virus'". +At immune_exposures.csv row 11, column 5: Validation failed for rule: "DOID:3660 equivalent-to 'dengue hemorrhagic fever'". +At immune_exposures.csv row 11, column 5: Validation failed for rule: "DOID:12206 equivalent-to 'wheat allergy'". diff --git a/docs/examples/immune_exposures_2.csv b/docs/examples/immune_exposures_2.csv new file mode 100644 index 000000000..f6f03f182 --- /dev/null +++ b/docs/examples/immune_exposures_2.csv @@ -0,0 +1,14 @@ +exposure process reported,exposure material reported,exposure material id,disease reported,disease ontology id,disease stage reported +is-required; subclass-of 'exposure process';,subclass-of|equivalent-to 'material entity'; equivalent-to %3; is-required (when %1 subclass-of ('administering substance in vivo' or 'exposure to substance without evidence for disease' or 'occurrence of infectious disease' or 'occurrence of allergic disease')); is-excluded (when %1 equivalent-to 'occurrence of disease'); is-excluded (when %1 subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer' or 'no exposure' or unknown)),equivalent-to %2,subclass-of disease; subclass-of 'has material basis in' some %2 (when %1 not-subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer')); equivalent-to %5; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 equivalent-to 'administering substance in vivo'); is-excluded (when %1 subclass-of (vaccination or 'transplant or transfusion' or 'exposure to substance without evidence for disease' or 'no exposure' or unknown)),equivalent-to %4,subclass-of 'disease stage'; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 not-subclass-of 'occurrence of disease') +vaccination,Varicella-zoster virus vaccine,VO:0000669,,, +infectious challenge,Hepacivirus C,NCBITaxon:11103,,, +transplant or transfusion,blood,UBERON:0000178,,, +infectious challenge,Dengue virus,NCBITaxon:12637,dengue hemorrhagic fever,DOID:12206,Acute/Recent onset +allergic disease,Triticum aestivum,NCBITaxon:4565,wheat allergy,DOID:3660,Post +occurrence of autoimmune disease,,,acquired immunodeficiency syndrome,DOID:635,Chronic +occurrence of cancer,,,colon cancer,DOID:219,Chronic +occurrence of asymptomatic infection,Dengue virus,NCBITaxon:12637,,, +exposure with existing immune reactivity without evidence for disease,Dengue virus 2,NCBITaxon:11060,,, +documented exposure without evidence for disease,Dengue virus,NCBITaxon:12637,,,Post +environmental exposure to endemic/ubiquitous agent without evidence for disease,Dengue virus,NCBITaxon:12637,,, +no exposure,,,,, \ No newline at end of file diff --git a/docs/examples/immune_exposures_2.html b/docs/examples/immune_exposures_2.html new file mode 100644 index 000000000..1c9a985aa --- /dev/null +++ b/docs/examples/immune_exposures_2.html @@ -0,0 +1,130 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
exposure process reportedexposure material reportedexposure material iddisease reporteddisease ontology iddisease stage reported
is-required; subclass-of 'exposure process';subclass-of|equivalent-to 'material entity'; equivalent-to %3; is-required (when %1 subclass-of ('administering substance in vivo' or 'exposure to substance without evidence for disease' or 'occurrence of infectious disease' or 'occurrence of allergic disease')); is-excluded (when %1 equivalent-to 'occurrence of disease'); is-excluded (when %1 subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer' or 'no exposure' or unknown))equivalent-to %2subclass-of disease; subclass-of 'has material basis in' some %2 (when %1 not-subclass-of ('occurrence of autoimmune disease' or 'occurrence of cancer')); equivalent-to %5; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 equivalent-to 'administering substance in vivo'); is-excluded (when %1 subclass-of (vaccination or 'transplant or transfusion' or 'exposure to substance without evidence for disease' or 'no exposure' or unknown))equivalent-to %4subclass-of 'disease stage'; is-required (when %1 subclass-of 'occurrence of disease'); is-excluded (when %1 not-subclass-of 'occurrence of disease')
vaccinationVaricella-zoster virus vaccineVO:0000669
infectious challengeHepacivirus CNCBITaxon:11103
transplant or transfusionbloodUBERON:0000178
infectious challengeDengue virusNCBITaxon:12637dengue hemorrhagic feverDOID:12206Acute/Recent onset
allergic diseaseTriticum aestivumNCBITaxon:4565wheat allergyDOID:3660Post
occurrence of autoimmune diseaseacquired immunodeficiency syndromeDOID:635Chronic
occurrence of cancercolon cancerDOID:219Chronic
occurrence of asymptomatic infectionDengue virusNCBITaxon:12637
exposure with existing immune reactivity without evidence for diseaseDengue virus 2NCBITaxon:11060
documented exposure without evidence for diseaseDengue virusNCBITaxon:12637Post
environmental exposure to endemic/ubiquitous agent without evidence for diseaseDengue virusNCBITaxon:12637
no exposure
+ + diff --git a/docs/examples/nucleus.html b/docs/examples/nucleus.html index 4a672868e..ea61f9abc 100644 --- a/docs/examples/nucleus.html +++ b/docs/examples/nucleus.html @@ -2,77 +2,80 @@ - +
+ + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + -
LABEL SubClass Of
cellcellular_componentcellcellular_component
cell part'part of' some cell|cellular_componentcell part'part of' some cell|cellular_component
cellular_componentcellular_component
intracellularcell partintracellularcell part
intracellular membrane-bounded organelleintracellular organelle|membrane-bounded organelleintracellular membrane-bounded organelleintracellular organelle|membrane-bounded organelle
intracellular organelle'part of' some intracellular|intracellular part|organelleintracellular organelle'part of' some intracellular|intracellular part|organelle
intracellular organelle lumen'part of' some 'intracellular organelle'|intracellular organelle part|organelle lumenintracellular organelle lumen'part of' some 'intracellular organelle'|intracellular organelle part|organelle lumen
intracellular organelle part'part of' some 'intracellular organelle'|'part of' some intracellular|'part of' some organelle|intracellular part|organelle partintracellular organelle part'part of' some 'intracellular organelle'|'part of' some intracellular|'part of' some organelle|intracellular part|organelle part
intracellular part'part of' some intracellular|cell partintracellular part'part of' some intracellular|cell part
membrane-bounded organelleorganellemembrane-bounded organelleorganelle
membrane-enclosed lumencellular_componentmembrane-enclosed lumencellular_component
nuclear lumen'part of' some nucleus|intracellular organelle lumen|nuclear partnuclear lumen'part of' some nucleus|intracellular organelle lumen|nuclear part
nuclear part'part of' some nucleus|intracellular organelle partnuclear part'part of' some nucleus|intracellular organelle part
nucleusintracellular membrane-bounded organellenucleusintracellular membrane-bounded organelle
organellecellular_componentorganellecellular_component
organelle lumen'part of' some organelle|membrane-enclosed lumen|organelle partorganelle lumen'part of' some organelle|membrane-enclosed lumen|organelle part
organelle part'part of' some organelle|cellular_componentorganelle part'part of' some organelle|cellular_component
\ No newline at end of file + + diff --git a/docs/validate-profile.md b/docs/validate-profile.md new file mode 100644 index 000000000..5ec1cde04 --- /dev/null +++ b/docs/validate-profile.md @@ -0,0 +1,31 @@ +# Validate Profile + +OWL 2 has a number of profiles that strike different balances between expressive power and reasoning efficiency. ROBOT can validate an input ontology against a profile (EL, DL, RL, QL, and Full) and generate a report. For example: + + robot validate-profile --profile EL \ + --input merged.owl \ + --output results/merged-validation.txt + +## Profiles + +* EL +* RL +* QL +* DL +* Full + +--- + +## Error Messages + +### Missing Profile Error + +Occurs when a `--profile` option is not provided. + +### Invalid Profile Error + +Occurs when the argument to `--profile` is not one of the following: EL, DL, RL, QL, or Full. See the above documentation for more details. + +### Profile Violation Error + +Occurs when the `--input` ontology does not conform to the `--profile`. See the profile descriptions for more details. diff --git a/docs/validate.md b/docs/validate.md index cc41399b0..f2a6b927c 100644 --- a/docs/validate.md +++ b/docs/validate.md @@ -1,31 +1,304 @@ # Validate -OWL 2 has a number of profiles that strike different balances between expressive power and reasoning efficiency. ROBOT can validate an input ontology against a profile (EL, DL, RL, QL, and Full) and generate a report. For example: +- [Overview](#overview) + - [Formats](#formats) + - [Other configuration](#other-configuration) +- [Input file organisation](#input-file-organisation) + - [Validation rules](#validation-rules) + - [Comments](#comments) + - [Cell data](#cell-data) + - [Multi-value cells](#multi-value-cells) +- [Validation rule syntax](#validation-rule-syntax) + - [Rule types](#rule-types) + - [Wildcards](#wildcards) + - [When-clauses](#when-clauses) + - [Compound rule-types](#compound-rule-types) +- [Error Messages](#error-messages) - robot validate-profile --profile EL \ - --input merged.owl \ - --output results/merged-validation.txt +## Overview -## Profiles +Validates tables (CSV or TSV files) (`--table`) against an input ontology (`--input`) using the sets of rules defined (per table) in the table files, and writes the output to TXT, HTML, or XLSX files in the output directory (`--output-dir`) with the same base filename. If no output format is specified then the output is directed to STDOUT. For example: -* EL -* RL -* QL -* DL -* Full + robot validate --input immune_exposures.owl \ + --table immune_exposures.csv \ + --reasoner hermit \ + --no-fail true \ + --format TXT \ + --output-dir results/ ---- +In this case the command will generate a single file called `immune_exposures.txt` in the `results/` directory. + +One can also specify multiple table files to validate against a single input ontology. In that case there will be multiple output files corresponding to each table in the output directory. For example: + + robot validate --input immune_exposures.owl \ + --table immune_exposures.csv \ + --table immune_exposures_2.csv \ + --reasoner hermit \ + --no-fail true \ + --format HTML \ + --output-dir results/ + +In this case two files: `immune_exposures.html` and `immune_exposures_2.html` will appear in the `results/` directory. + +### Formats + +* `txt`: a list of failed validations. E.g.: +``` +At immune_exposures.csv row 17, column 2: Cell is empty but rule: "is-required true" does not allow this. +``` +* `html`: a [Bootstrap](https://getbootstrap.com/) HTML version of the `--table`. Cells containing bad data (failed validations) are highlighted red. Hovering over the red cells shows a tooltip with the message. These tables use CSS and JavaScript plugins from the BootstrapCDN, therefore they require an internet connection to properly view the table. See [HTML Tables](#html-tables-standalone) for more details. +* `xlsx`: an Excel spreadsheet version of the `--table`. Cells containing bad data are highlighted red and have a Comment on them containing the message. + +### Other options + +#### Exit Codes (`--no-fail`) + +If there are any invalid cells, `validate` will fail by default with exit code `1`. This is good for use in [`Makefile` workflows](/make), as it will stop the workflow when there is a non-zero exit code. You can override this with `--no-fail true` as shown in the above examples if you want to bypass failures and always exit with code `0`. + +#### Logging (`--silent`) + +`validate` will only print a summary message at the end if there were any failures (unless no `--format` is specified, in which case it will always print to STDOUT). If you would like to print all invalid data messages, include `--silent false`. + +#### Output Files (`--write-all`) + +`validate` will only write tables with failed validations to the output directory. If you wish to write _all_ tables, including those that did not have any failed validation, specify `--write-all true`. + +#### HTML Tables (`--standalone`) + +If the output format is HTML, all output tables will be written as "standalone" files. This means that they have a header containing the Bootstrap stylesheet and scripts ([for tooltips](https://getbootstrap.com/docs/4.5/components/tooltips)). If you want to plug the table data into an existing HTML file, you can use `--standalone false` to generate _just_ the table element. + +Note that the tooltips and styling will not work until the table is inserted into a file containing the required CSS and JavaScript from [BootstrapCDN](https://getbootstrap.com/docs/4.5/getting-started/introduction/). For offline viewing, you can also [download](https://getbootstrap.com/docs/4.5/getting-started/download/) the required files and provide a local path in the HTML header. The Bootstrap download does not include [jQuery](https://jquery.com/download/) or [Popper.js](https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js), which are required to enable tooltips (as well as a [small piece of JavaScript](https://getbootstrap.com/docs/4.5/components/tooltips/#example-enable-tooltips-everywhere)). + +#### Errors Table (`--errors`) + +The results that are written to the output directory contain all lines from the input tables, even if those lines don't have errors. You can choose to also output a table containing just the errors by specifying `--errors `. If this path ends with `.csv`, the output will be comma-separated. Otherwise, the output will be tab-separated. + +This output will contain the following columns: +* **ID**: an ID for the error - this starts at 1 and increases with each error +* **table**: the name of the table this cell is in +* **cell**: the A1 notation for the location of the cell +* **level**: the violation level (at this time, this value will always be `error`) +* **rule ID**: a combination of `!` for the cell that the rule is written in +* **rule name**: the written text of the rule +* **value**: the value of the cell that failed +* **fix**: how to fix this error (at this time, this cell will always be empty) + +## Input file organisation + +### Validation rules + +Validation rules are read from the second row of the CSV or TSV file (`--table `). + +If the `--skip-row k` option is used, then the 'second row' is the second of the rows remaining in the table _after_ the kth row has been removed. For example, if you include validation rules in a [template](/robot), you might put the rules in the third row (after the template strings, which must be in the second row) and include `--skip-row 2`. + +Below is an example table. One or more validation rule can be specified for each column, separated by a semi-colon (`;`), and these rules are applied only to the data in that column. Each rule in a column will be validated independently, and if any one of those rules is violated, the data in the cell will be considered invalid (i.e., the data must pass all rules). For details on using rules as "OR" statements (i.e., the data must pass at least one rule), see [Compound rule-types](#compound-rule-types). + +|header A |header B |header C | +|--------------------------------|--------------------------------|-----------------------------| +|rule A1; rule A2; rule A3 ... |rule B1; rule B2; rule B3 ... |rule C1; rule C2; rule C3 ...| +|data |data |data | +|data |data |data | +|... | | | + +### Comments + +To comment out all of the rules for a given column, the list should be prefixed by '##'. To comment out particular rules from among the rules belonging to a given column, prefix those rules with '#'. For example: + +_To comment out all rules:_ +``` +## rule 1; rule 2; rule 3 +``` + +_To comment out rule 1 but not rule 2:_ +``` +# rule 1; rule 2 +``` + +_To comment out rule 2 but not rule 1:_ +``` +rule 1; # rule 2 +``` + +### Cell data + +Data cells must either be in the form of a named class, e.g. 'Dengue virus', a named individual, e.g. 'Dr. Smith', or a general class expression, e.g. ('Dengue virus' or 'Dengue virus 2'). IRIs or CURIEs may be be used in lieu of labels if desired. + +When using labels within class expressions, the label must be enclosed in single quotes if it contains a space. No quotes should be used for single-word labels (e.g. `virus`). If you are just referring to a named class or individual, you do not need to enclose the label in single quotes. + +### Multi-value cells + +A data cell can contain more than one logical entity if these are separated using the pipe ('|') character. Such cells are called multi-value cells. When a rule is defined over a multi-value cell, it will be validated for each logical entity in that multi-value cell, and if the rule contains a [wildcard](#wildcards) that refers to a multi-value cell, then all possible interpretations of that rule will be validatd against the current cell (which may itself be a multi-value cell). Consider, for example: + +|header 1 |header 2 | +|--------------------------------|--------------------------------| +| |subclass-of %1 | +|data1A \| data1B |data2A \| data2B | +|... | | + +In this case, the following validations will be performed: + +* data2A subclass-of data1A +* data2A subclass-of data1B +* data2B subclass-of data1A +* data2B subclass-of data1B + +## Validation rule syntax + +Individual rules must be of the form: + + [(when & ...)] + +Where: + +* `` can be one of (or a combination of -- see [Compound rule-types](#compound-rule-types)): + + * is-required + * is-excluded + * subclass-of + * direct-subclass-of + * not-subclass-of + * not-direct-subclass-of + * superclass-of + * direct-superclass-of + * not-superclass-of + * not-direct-superclass-of + * equivalent-to + * not-equivalent-to + * instance-of + * direct-instance-of + * not-instance-of + +* `` can be any of the above rule types (or a combination of -- see [Compound rule-types](#compound-rule-types)) _except_ `is-required` and `is-excluded` + +### Rule types + +* The following rule types are called _presence_ rule types. They place restrictions on whether a cell in a given column can have data or not, and may take a value of either `true` (equivalently: `t`, `yes`, `y`) or `false` (equivalently: `f`, `no`, `n`). If no truth value is supplied, `true` is assumed. + + * is-required + * When set to `true` (implicitly or explicitly), this indicates that cells in this column should have data, possibly conditional upon an optional when-clause. E.g. `is-required (when )` (see [When-clauses](#when-clauses)) + * is-excluded + * When set to `true` (implicitly or explicitly), this indicates that cells in this column must be empty, possibly conditional upon an optional when-clause. E.g. `is-excluded (when )` (see [When-clauses](#when-clauses)) + +* The following rule types are called _query_ rule types. They involve queries to the reasoner. Consider the example rule: ` 'vaccine'`. Replacing `` with each of the below results in the following corresponding reasoner queries: + + * subclass-of + * queries the reasoner to verify that the class represented in the current cell is a subclass of the class 'vaccine' + * direct-subclass-of + * queries the reasoner to verify that the class represented in the current cell is a direct subclass of the class 'vaccine' + * not-subclass-of + * queries the reasoner to verify that the class represented in the current cell is **not** a subclass of the class 'vaccine' + * not-direct-subclass-of + * queries the reasoner to verify that the class represented in the current cell is **not** a direct subclass of the class 'vaccine' + * superclass-of + * queries the reasoner to verify that the class represented in the current cell is a superclass of the class 'vaccine' + * direct-superclass-of + * queries the reasoner to verify that the class represented in the current cell is a direct superclass of the class 'vaccine' + * not-superclass-of + * queries the reasoner to verify that the class represented in the current cell is **not** a superclass of the class 'vaccine' + * not-superclass-of + * queries the reasoner to verify that the class represented in the current cell is **not** a direct superclass of the class 'vaccine' + * equivalent-to + * queries the reasoner to verify that the class represented in the current cell is equivalent to the class 'vaccine' + * not-equivalent-to + * queries the reasoner to verify that the class represented in the current cell is **not** equivalent to the class 'vaccine' + * instance-of + * queries the reasoner to verify that the individual represented in the current cell is an instance of the class 'vaccine' + * direct-instance-of + * queries the reasoner to verify that the individual represented in the current cell is a direct instance of the class 'vaccine' + * not-instance-of + * queries the reasoner to verify that the individual represented in the current cell is **not** an instance of the class 'vaccine' + +#### Further notes on `` and `` + +* For the rule types: `is-required` and `is-excluded`, `` is _optional_ and if not specified defaults to _true_. + +* For other rule types, `` is _mandatory_ and must be in the form of a description logic (DL) expression query, in Manchester syntax. + +* `instance-of`, `direct-instance-of`, and `not-instance-of` may only be applied to named individuals. + +* `subclass-of`, `direct-subclass-of`, `not-subclass-of`, `superclass-of`, `direct-superclass-of`, `not-superclass-of`, and `equivalent-to` may be applied only to classes or general class expressions. + +* `` must describe an individual, a class, or a generalised class expression and can be in the form of an `rdfs:label`, an IRI, an abbreviated IRI, a general DL expression, or a wildcard. + +### Wildcards + +Wildcards of the form `%n` can be specified within ``, ``, and `` clauses, and are used to indicate the entity described by the data in the _nth_ cell of a given row. E.g.: + +``` +is-required (when %1 equivalent-to ('Dengue virus' or 'Dengue virus 2')) +``` + +requires data in the current cell whenever the class indicated in column 1 of the current row is either 'Dengue virus' or 'Dengue virus 2'. + +``` +subclass-of hasBasisIn in some %2 (when %1 subclass-of ('Dengue virus' or 'Dengue virus 2')) +``` + +requires that, whenever the class indicated in column 1 of the current row is a subclass of the class consisting of the union of `'Dengue virus'` and `'Dengue virus 2'`, the data in the current cell must be a subclass of the set of classes that bear the relation `hasBasisIn` to the class indicated in column 2 of the same row. + +### When-clauses + +The optional when-clause indicates that the rule given in the main clause should be validated only when the when-clause is satisfied. If multiple when-clauses are specified (separated by `'&'`, then each when-clause must evaluate to _true_ in order for the main validation rule to execute. E.g.: + +``` +direct-subclass-of %2 (when %5 superclass-of 'exposure process' & %2 superclass-of vaccine) +``` + +indicates that the validation rule `'direct-subclass-of %2'` should only be run against the current cell when both the cell in column 5 is a superclass of `'exposure process'` and the cell in column 2 is a superclass of `vaccine`. + +### Compound rule-types + +`` and `` can take the form: `rule-type-1|rule-type-2|rule-type-3|...` + +E.g. +``` +subclass-of|equivalent-to %3 (when %4 subclass-of|equivalent-to %2) +``` + +requires that, whenever the contents of the cell in column 4 of the given row are either a subclass-of or equivalent-to the contents of the cell in column 2, then the contents of the current cell must be a subclass-of or equivalent-to the contents of the cell in column 3. ## Error Messages -### Missing Profile Error +### Malformed Rule Error + +The indicated rule could not be parsed. See: [Validation Rule Syntax](#validation-rule-syntax). + +### Invalid Presence Rule Error + +A rule of the presence type must be in the form of a truth value. If this is ommitted it defaults to 'true'. For example, the following are valid: `is-required true`, `is-excluded`, `is-excluded false`. See: [Presence Types and Query Types](#presence-types-and-query-types). + +### Column Out of Range Error + +When a wildcard is used as part of a rule, the column number indicated must not be greater than the number of columns that are in the table data provided. See: [Wildcards](#wildcards). + +### No Main Error + +When a when-clause is specified, a main clause must also be specified, with the latter being evaluated only when the when-clause is satisfied. See: [Validation Rule Syntax](#validation-rule-syntax). + +### Malformed When Clause Error + +The indicated when-clause could not be parsed. See: [When-Clauses](#when-clauses). + +### Invalid When Type Error + +The indicated when rule type is not one of the rule types allowed in a when-clause. See: [Validation Rule Syntax](#validation-rule-syntax). + +### Unrecognized Query Type Error + +The query type indicated is not one of the recognized query types. See: [Presence Types and Query Types](#presence-types-and-query-types). + +### Unrecognized Rule Type Error + +The rule type indicated is not one of the recognized rule types. See: [Validation Rule Syntax](#validation-rule-syntax). + +### Table Not Provided Error -Occurs when a `--profile` option is not provided. +The name of a `.csv` or `.tsv` file containing the table data to validate must be supplied using the `--table` option of the `validate` command. E.g. `robot validate --input myontology.owl --table mytable.csv`. -### Invalid Profile Error +### Incorrect Table Format Error -Occurs when the argument to `--profile` is not one of the following: EL, DL, RL, QL, or Full. See the above documentation for more details. +The name of the file specified using the `--table` option must end in either `.csv` or `.tsv`. -### Profile Violation Error +### Invalid Skip Row Error -Occurs when the `--input` ontology does not conform to the `--profile`. See the profile descriptions for more details. +The value of the `--skip-row` option must be an integer. diff --git a/pom.xml b/pom.xml index 17da5f969..e775770df 100644 --- a/pom.xml +++ b/pom.xml @@ -158,11 +158,6 @@ slf4j-log4j12 1.7.10 - - commons-io - commons-io - 2.4 - net.sourceforge.owlapi owlapi-distribution @@ -197,11 +192,6 @@ jfact 4.0.4 - - net.sf.py4j - py4j - 0.10.8.1 - diff --git a/robot-command/pom.xml b/robot-command/pom.xml index 04a98c779..ee5b77925 100644 --- a/robot-command/pom.xml +++ b/robot-command/pom.xml @@ -81,5 +81,10 @@ commons-cli 1.2 + + commons-io + commons-io + 2.4 + diff --git a/robot-command/src/main/java/org/obolibrary/robot/CommandLineInterface.java b/robot-command/src/main/java/org/obolibrary/robot/CommandLineInterface.java index a26de9930..1b935c87d 100644 --- a/robot-command/src/main/java/org/obolibrary/robot/CommandLineInterface.java +++ b/robot-command/src/main/java/org/obolibrary/robot/CommandLineInterface.java @@ -42,6 +42,7 @@ private static CommandManager initManager() { m.addCommand("report", new ReportCommand()); m.addCommand("template", new TemplateCommand()); m.addCommand("unmerge", new UnmergeCommand()); + m.addCommand("validate", new ValidateCommand()); m.addCommand("validate-profile", new ValidateProfileCommand()); m.addCommand("verify", new VerifyCommand()); return m; diff --git a/robot-command/src/main/java/org/obolibrary/robot/ValidateCommand.java b/robot-command/src/main/java/org/obolibrary/robot/ValidateCommand.java new file mode 100644 index 000000000..edff56615 --- /dev/null +++ b/robot-command/src/main/java/org/obolibrary/robot/ValidateCommand.java @@ -0,0 +1,278 @@ +package org.obolibrary.robot; + +import java.io.File; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Handles command-line options for the {@link ValidateOperation}. + * + * @author Michael Cuffaro + */ +public class ValidateCommand implements Command { + /** Logger */ + private static final Logger logger = LoggerFactory.getLogger(ValidateCommand.class); + + /** Used to store the command-line options for the command. */ + private Options options; + + /** Namespace for error messages. */ + private static final String NS = "validate#"; + + private static final String tableNotProvidedError = + NS + "TABLE NOT PROVIDED ERROR a table file must be specified to run this command"; + + private static final String incorrectTableFormatError = + NS + "INCORRECT TABLE FORMAT ERROR the table file must end in either .csv or .tsv"; + + private static final String incorrectOutputFormatError = + NS + "INCORRECT OUTPUT FORMAT ERROR the output format must be one of HTML, XLSX, or TXT"; + + private static final String missingOutputDirectoryError = + NS + "MISSING OUTPUT DIRECTORY ERROR output directory required when format is specified"; + + private static final String invalidOutputDirectoryError = + NS + + "INVALID OUTPUT DIRECTORY ERROR the specified output directory does not exist or " + + "is not writable"; + + private static final String invalidSkipRowError = + NS + "INVALID SKIP ROW ERROR the specified skip-row must be an integer"; + + /** Constructor: Initialises the command with its various options. */ + public ValidateCommand() { + Options o = CommandLineHelper.getCommonOptions(); + o.addOption("t", "table", true, "file containing data (in CSV or TSV format) to validate"); + o.addOption( + "k", + "skip-row", + true, + "ignore the given row from the tables to be validated " + + "(where the first row in the file is row #1); this option is ignored if the row " + + "to skip is greater than the total number of rows in a table"); + o.addOption("i", "input", true, "input file containing the ontology data to validate against"); + o.addOption( + "r", + "reasoner", + true, + "reasoner to use; must be one of: structural, hermit, jfact, " + + "emr, elk (if left unspecified, the default reasoner will be used)"); + o.addOption( + "o", + "output-dir", + true, + "directory where output files will be saved (ignored if " + + "format option is left unspecified)"); + o.addOption( + "f", + "format", + true, + "format for output file (XLSX, HTML, TXT) (if unspecified, " + + "plain text output is sent to STDOUT)"); + o.addOption( + "s", + "standalone", + true, + "If false, do not put HTML headers/script in the HTML output (this option is ignored for other formats)"); + o.addOption("n", "no-fail", true, "If true, do not fail even if there are failed validations"); + o.addOption("S", "silent", true, "If false, print all failed validations"); + o.addOption( + "w", + "write-all", + true, + "If true, write all tables to output directory - including tables with no failed validations"); + o.addOption("e", "errors", true, "Write errors-only for all tables to given path"); + options = o; + } + + /** + * Returns the name of the command + * + * @return name + */ + public String getName() { + return "validate"; + } + + /** + * Returns a brief description of the command. + * + * @return description + */ + public String getDescription() { + return "validate the data in the given file"; + } + + /** + * Returns the command-line usage for the command. + * + * @return usage + */ + public String getUsage() { + return "validate --table [--table ...] [--skip-row k] --input " + + "[--reasoner ] [--format (HTML|XLSX|TXT)] [--output-dir ] " + + "[--standalone (true|false)] [--no-fail (true|false)] [--silent (true|false)]"; + } + + /** + * Returns the command-line options for the command. + * + * @return options + */ + public Options getOptions() { + return options; + } + + /** + * Handles the command-line and file operations for the ValidateOperation + * + * @param args strings to use as arguments + */ + public void main(String[] args) { + try { + execute(null, args); + } catch (Exception e) { + CommandLineHelper.handleException(e); + } + } + + /** + * Accepts an input state and command line arguments and calls ValidateOperation to validate the + * data that has been passed. Returns the final state of the command. + * + * @param state the state from the previous command, or null + * @param args the command-line arguments + * @return the input state, unchanged + * @throws Exception on any problem + */ + public CommandState execute(CommandState state, String[] args) throws Exception { + CommandLine line = CommandLineHelper.getCommandLine(getUsage(), getOptions(), args); + if (line == null) { + return null; + } + + if (state == null) { + state = new CommandState(); + } + + // Get the input ontology either from the command line directly or as part of the command + // chain: + IOHelper ioHelper = CommandLineHelper.getIOHelper(line); + state = CommandLineHelper.updateInputOntology(ioHelper, state, line); + OWLOntology ontology = state.getOntology(); + + // Get the reasoner specified by the user and if none is specified, use the default: + if (CommandLineHelper.getOptionalValue(line, "reasoner") == null) { + logger.info("No reasoner specified. Will use the default."); + } + OWLReasonerFactory reasonerFactory = CommandLineHelper.getReasonerFactory(line, true); + + // Override default reasoner options with command-line options + Map validateOptions = ValidateOperation.getDefaultOptions(); + for (String option : validateOptions.keySet()) { + if (line.hasOption(option)) { + validateOptions.put(option, line.getOptionValue(option)); + } + } + + String outFormat = OptionsHelper.getOption(validateOptions, "format"); + String outDir = OptionsHelper.getOption(validateOptions, "output-dir"); + + // If an output format has been specified, make sure that it is of a supported kind and that + // the output directory is valid. If it hasn't been specified it will just be passed as null to + // the validate operation. + if (outFormat != null) { + if (!(outFormat.equalsIgnoreCase("html") + || outFormat.equalsIgnoreCase("xlsx") + || outFormat.equalsIgnoreCase("txt"))) { + throw new IllegalArgumentException(incorrectOutputFormatError); + } + + if (outDir == null) { + throw new IllegalArgumentException(missingOutputDirectoryError); + } + + File d = new File(outDir); + if (!d.exists() || !d.isDirectory() || !d.canWrite()) { + throw new IllegalArgumentException(invalidOutputDirectoryError); + } + } + + // Get the paths to the tables given in the --table arguments. + List tablePaths = CommandLineHelper.getOptionalValues(line, "table"); + if (tablePaths.isEmpty()) { + throw new IllegalArgumentException(tableNotProvidedError); + } + + // Extract all of the data from each of the given table paths. Only TSV and CSV tables + // are currently supported. + Map>> tables = new HashMap<>(); + for (String tablePath : tablePaths) { + List> tableData; + if (tablePath.toLowerCase().endsWith(".tsv")) { + tableData = IOHelper.readTSV(tablePath); + } else if (tablePath.toLowerCase().endsWith(".csv")) { + tableData = IOHelper.readCSV(tablePath); + } else { + throw new IllegalArgumentException(incorrectTableFormatError); + } + + // If the `--skip-row` switch has been specified, then possibly delete the specified row from + // the table: + String rowToSkipStr; + if ((rowToSkipStr = CommandLineHelper.getOptionalValue(line, "skip-row")) != null) { + try { + int rowToSkip = Integer.parseInt(rowToSkipStr); + if (rowToSkip > tableData.size() || rowToSkip < 1) { + logger.warn( + "ignoring skip-row value: {}; there are {} rows in '{}'", + rowToSkip, + tableData.size(), + tablePath); + } else { + tableData.remove(rowToSkip - 1); + } + } catch (NumberFormatException e) { + throw new IllegalArgumentException(invalidSkipRowError); + } + } + tables.put(tablePath, tableData); + } + + boolean noFail = CommandLineHelper.getBooleanValue(line, "no-fail", false); + + // Finally send everything to the validate operation: + List invalidTables = + ValidateOperation.validate(tables, ontology, ioHelper, reasonerFactory, validateOptions); + + if (!invalidTables.isEmpty() && !noFail) { + // Print last error message - a summary of tables with errors + StringBuilder sb = new StringBuilder(); + sb.append("VALIDATION FAILED - the following table(s) had one or more rule violation:"); + for (String it : invalidTables) { + sb.append("\n- ").append(it); + if (outDir != null && outFormat != null) { + // If there is an output for this (not just printed to console) + // provide the path to the output file + sb.append(" (") + .append(new File(outDir).getPath()) + .append("/") + .append(it.split("\\.")[0]) + .append(".") + .append(outFormat.toLowerCase()) + .append(")"); + } + } + System.out.println(sb.toString()); + System.exit(1); + } + return state; + } +} diff --git a/robot-command/src/main/java/org/obolibrary/robot/ValidateProfileCommand.java b/robot-command/src/main/java/org/obolibrary/robot/ValidateProfileCommand.java index 9dc716221..0727067d9 100644 --- a/robot-command/src/main/java/org/obolibrary/robot/ValidateProfileCommand.java +++ b/robot-command/src/main/java/org/obolibrary/robot/ValidateProfileCommand.java @@ -17,7 +17,7 @@ public class ValidateProfileCommand implements Command { private final Options options; /** Namespace for error messages. */ - private static final String NS = "validate#"; + private static final String NS = "validate-profile#"; /** Error message when a profile is not provided. */ private static final String missingProfileError = diff --git a/robot-core/pom.xml b/robot-core/pom.xml index d2b863a54..887ef75ef 100644 --- a/robot-core/pom.xml +++ b/robot-core/pom.xml @@ -82,18 +82,23 @@ org.apache.poi poi - 3.15 + 4.1.2 org.apache.poi poi-ooxml - 3.15 + 4.1.2 com.github.jsonld-java jsonld-java 0.5.1 + + com.hubspot.jinjava + jinjava + 2.5.2 + org.yaml snakeyaml @@ -168,5 +173,10 @@ gson 2.8.0 + + net.sf.py4j + py4j + 0.10.8.1 + diff --git a/robot-core/src/main/java/org/obolibrary/robot/ExportOperation.java b/robot-core/src/main/java/org/obolibrary/robot/ExportOperation.java index c72a0ae2d..f3242092e 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/ExportOperation.java +++ b/robot-core/src/main/java/org/obolibrary/robot/ExportOperation.java @@ -47,8 +47,9 @@ public class ExportOperation { private static final OWLDataFactory dataFactory = OWLManager.getOWLDataFactory(); - private static final EmptyShortFormProvider emptyProvider = new EmptyShortFormProvider(); - private static final EmptyIRIShortFormProvider emptyIRIProvider = new EmptyIRIShortFormProvider(); + protected static final EmptyShortFormProvider emptyProvider = new EmptyShortFormProvider(); + protected static final EmptyIRIShortFormProvider emptyIRIProvider = + new EmptyIRIShortFormProvider(); // All synonym property IRIs private static final List synonymProperties = @@ -1366,7 +1367,7 @@ private static List propertyExpressionsToString( * @param object OWLObject to render * @return String rendering of OWLObject based on renderer type */ - private static String renderManchester( + protected static String renderManchester( RendererType rt, ShortFormProvider provider, OWLObject object) { ManchesterOWLSyntaxObjectRenderer renderer; StringWriter sw = new StringWriter(); diff --git a/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java b/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java index b495b3b82..2c92efa47 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java +++ b/robot-core/src/main/java/org/obolibrary/robot/IOHelper.java @@ -741,6 +741,36 @@ public Set extractTerms(String input) { return results; } + /** + * Convert a row index and column index for a cell to A1 notation. + * + * @param rowNum row index + * @param colNum column index + * @return A1 notation for cell location + */ + public static String cellToA1(int rowNum, int colNum) { + // To store result (Excel column name) + StringBuilder colLabel = new StringBuilder(); + + while (colNum > 0) { + // Find remainder + int rem = colNum % 26; + + // If remainder is 0, then a + // 'Z' must be there in output + if (rem == 0) { + colLabel.append("Z"); + colNum = (colNum / 26) - 1; + } else { + colLabel.append((char) ((rem - 1) + 'A')); + colNum = colNum / 26; + } + } + + // Reverse the string and print result + return colLabel.reverse().toString() + rowNum; + } + /** * Given a term string, use the current prefixes to create an IRI. * @@ -1245,6 +1275,21 @@ public static List> readTable(String path) throws IOException { return TemplateHelper.readTable(path); } + /** + * Write a table from a list of arrays. + * + * @param table List of arrays to write + * @param path path to write to + * @throws IOException + */ + public static void writeTable(List table, String path) throws IOException { + char separator = '\t'; + if (path.endsWith(".csv")) { + separator = ','; + } + writeTable(table, new File(path), separator); + } + /** * Write a table from a list of arrays. * diff --git a/robot-core/src/main/java/org/obolibrary/robot/TableValidator.java b/robot-core/src/main/java/org/obolibrary/robot/TableValidator.java new file mode 100644 index 000000000..7a4a60810 --- /dev/null +++ b/robot-core/src/main/java/org/obolibrary/robot/TableValidator.java @@ -0,0 +1,1289 @@ +package org.obolibrary.robot; + +import com.google.common.collect.Lists; +import java.io.*; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.io.FilenameUtils; +import org.apache.poi.ss.usermodel.FillPatternType; +import org.apache.poi.ss.usermodel.IndexedColors; +import org.apache.poi.ss.usermodel.Workbook; +import org.obolibrary.robot.export.*; +import org.obolibrary.robot.providers.CURIEShortFormProvider; +import org.obolibrary.robot.providers.QuotedAnnotationValueShortFormProvider; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.io.OWLParserException; +import org.semanticweb.owlapi.manchestersyntax.parser.ManchesterOWLSyntaxClassExpressionParser; +import org.semanticweb.owlapi.model.*; +import org.semanticweb.owlapi.reasoner.Node; +import org.semanticweb.owlapi.reasoner.NodeSet; +import org.semanticweb.owlapi.reasoner.OWLReasoner; +import org.semanticweb.owlapi.util.ShortFormProvider; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TableValidator { + + /** Logger */ + private static final Logger logger = LoggerFactory.getLogger(ValidateOperation.class); + + /** Namespace for error messages. */ + private static final String NS = "validate#"; + + /** Error message for a rule that couldn't be parsed */ + private static final String malformedRuleError = NS + "MALFORMED RULE ERROR malformed rule: %s"; + + /** + * Error message for an invalid presence rule. Presence rules must be in the form of a truth + * value. + */ + private static final String invalidPresenceRuleError = + NS + + "INVALID PRESENCE RULE ERROR in column %d: invalid rule: \"%s\" for rule type: %s. Must be " + + "one of: true, t, 1, yes, y, false, f, 0, no, n"; + + /** Error message for invalid output format. */ + private static final String invalidFormatError = + NS + "INVALID FORMAT ERROR '%s' must be one of: html, xlsx, or txt"; + + /** + * Error reported when a wildcard in a rule specifies a column greater than the number of columns + * in the table. + */ + private static final String columnOutOfRangeError = + NS + + "COLUMN OUT OF RANGE ERROR in column %d: rule \"%s\" indicates a column number that is " + + "greater than the row length (%d)."; + + /** Error reported when a when-clause does not have a corresponding main clause */ + private static final String noMainError = + NS + "NO MAIN ERROR in column %d: rule: \"%s\" has when clause but no main clause."; + + /** Error reported when a when-clause can't be parsed */ + private static final String malformedWhenClauseError = + NS + "MALFORMED WHEN CLAUSE ERROR in column %d: unable to decompose when-clause: \"%s\"."; + + /** Error reported when a when-clause is of an invalid or inappropriate type */ + private static final String invalidWhenTypeError = + NS + + "INVALID WHEN TYPE ERROR in column %d: in clause: \"%s\": Only rules of type: %s are " + + "allowed in a when clause."; + + /** Error reported when a query type is unrecognized */ + private static final String unrecognizedQueryTypeError = + NS + + "UNRECOGNIZED QUERY TYPE ERROR in column %d: query type \"%s\" not recognized in rule " + + "\"%s\"."; + + /** Error reported when a rule type is not recognized */ + private static final String unrecognizedRuleTypeError = + NS + "UNRECOGNIZED RULE TYPE ERROR in column %d: unrecognized rule type \"%s\"."; + + /** Reverse map from rule types (as Strings) to RTypeEnums, populated at load time */ + private static final Map rule_type_to_rtenum_map = new HashMap<>(); + + static { + for (RTypeEnum r : RTypeEnum.values()) { + rule_type_to_rtenum_map.put(r.getRuleType(), r); + } + } + + /** + * Reverse map from rule types in the QUERY category (as Strings) to RTypeEnums, populated at load + * time + */ + private static final Map query_type_to_rtenum_map = new HashMap<>(); + + static { + for (RTypeEnum r : RTypeEnum.values()) { + if (r.getRuleCat() == RCatEnum.QUERY) { + query_type_to_rtenum_map.put(r.getRuleType(), r); + } + } + } + + private OWLOntology ontology; + private String outFormat = null; + private String outDir; + + /** The parser to use when validating class expressions */ + private ManchesterOWLSyntaxClassExpressionParser parser; + + private OWLReasoner reasoner; + + private static final OWLDataFactory dataFactory = OWLManager.getOWLDataFactory(); + + private Map iriToLabelMap; + private Map labelToIRIMap; + + private List invalidTables = new ArrayList<>(); + private List messages = new ArrayList<>(); + + private Table outTable = null; + private String currentTable; + private int colNum; + private int rowNum; + private boolean valid; + private boolean silent; + + private List errors = new ArrayList<>(); + private int errCount = 0; + + private Cell currentCell = null; + + private ShortFormProvider provider; + + public TableValidator( + OWLOntology ontology, + IOHelper ioHelper, + ManchesterOWLSyntaxClassExpressionParser parser, + OWLReasoner reasoner, + String outFormat, + String outDir) { + this.ontology = ontology; + this.parser = parser; + this.reasoner = reasoner; + if (outFormat != null) { + // Add the format and validate it + this.outFormat = outFormat.toLowerCase(); + if (!Lists.newArrayList("xlsx", "html", "txt").contains(this.outFormat)) { + throw new IllegalArgumentException(String.format(invalidFormatError, outFormat)); + } + } + this.outDir = outDir; + + // Extract from the ontology two convenience maps from rdfs:labels to IRIs and vice versa: + iriToLabelMap = OntologyHelper.getLabels(ontology); + labelToIRIMap = reverseIRILabelMap(iriToLabelMap); + + // Create some providers for rendering entities + ShortFormProvider oboProvider = new CURIEShortFormProvider(ioHelper.getPrefixes()); + provider = + new QuotedAnnotationValueShortFormProvider( + ontology.getOWLOntologyManager(), + oboProvider, + ioHelper.getPrefixManager(), + Collections.singletonList(OWLManager.getOWLDataFactory().getRDFSLabel()), + Collections.emptyMap()); + + errors.add( + new String[] {"ID", "table", "cell", "level", "rule ID", "rule name", "value", "fix"}); + } + + /** Turn logging on or off. */ + public void toggleLogging() { + silent = !silent; + } + + public List getErrors() { + return errors; + } + + /** + * Validate a set of tables. + * + * @param tables tables to validate (map of table name to table contents) + * @param options map of validate options + * @return List of invalid tables (or empty list on success) + * @throws Exception on any problem + */ + public List validate(Map>> tables, Map options) + throws Exception { + + int skippedRow = Integer.parseInt(OptionsHelper.getOption(options, "skip-row", "0")); + + // Validate all of the tables in turn: + for (Map.Entry>> table : tables.entrySet()) { + // Reset valid for new table + valid = true; + outTable = new Table(outFormat); + String tablePath = table.getKey(); + List> data = table.getValue(); + + currentTable = + String.format( + "%s.%s", FilenameUtils.getBaseName(tablePath), FilenameUtils.getExtension(tablePath)); + if (outFormat == null) { + System.out.println(String.format("Validating %s ...", currentTable)); + } + + // Get the header and rules rows + List headerRow = data.remove(0); + List rulesRow = data.remove(0); + int ruleRowIdx = 2; + if (skippedRow < 3) { + ruleRowIdx = 3; + } + + // Add header and rules rows to Table object + for (int i = 0; i < headerRow.size(); i++) { + String rawRule = i < rulesRow.size() ? rulesRow.get(i) : ""; + // TODO - allow different providers? + Column c = new Column(headerRow.get(i), parseRules(rawRule), rawRule, provider); + outTable.addColumn(c); + } + + List columns = outTable.getColumns(); + + // Validate data row by row, column by column + int add = 0; + for (rowNum = 0; rowNum < data.size(); rowNum++) { + if (rowNum + 1 == skippedRow) { + // Add 1 to each row num we report on to compensate for skipped row + add = 1; + } + List row = data.get(rowNum); + if (!hasContent(row)) { + logger.debug(String.format("Skipping empty row %d", rowNum + 1 + add)); + continue; + } + + Row outRow = null; + if (outFormat != null) { + outRow = new Row(); + } + + for (colNum = 0; colNum < columns.size(); colNum++) { + Column c = columns.get(colNum); + Map> rules = c.getRules(); + + // Get the contents of the current cell: + String cellString = colNum < row.size() ? row.get(colNum) : ""; + + // Extract all the data entries contained within the current cell: + List cellData = Lists.newArrayList(cellString.trim().split("\\|")); + + // Create the cell object + currentCell = getCell(c, cellData); + + if (rules == null || rules.isEmpty()) { + // No rules to validate, just add the cell exactly as is + if (outRow != null) { + outRow.add(currentCell); + } + continue; + } + + // For each of the rules applicable to this column, validate each entry in the cell + // against it: + for (Map.Entry> ruleEntry : rules.entrySet()) { + for (String rule : ruleEntry.getValue()) { + List interpolatedRules = interpolateRule(rule, row); + for (String interpolatedRule : interpolatedRules) { + for (String d : cellData) { + String errorMsg = validateRule(d, interpolatedRule, row, ruleEntry.getKey()); + if (errorMsg != null) { + // An error was returned, add to errors + errCount++; + errors.add( + new String[] { + String.valueOf(errCount), + currentTable, + IOHelper.cellToA1(rowNum + 3 + add, colNum + 1), + "error", + FilenameUtils.getBaseName(currentTable) + + "!" + + IOHelper.cellToA1(ruleRowIdx, colNum + 1), + errorMsg, + d, + "" + }); + } + } + } + } + } + if (outRow != null) { + outRow.add(currentCell); + } + } + if (outFormat != null) { + outTable.addRow(outRow); + } + } + + if (!valid) { + invalidTables.add(currentTable); + } + + boolean standalone = OptionsHelper.optionIsTrue(options, "standalone"); + boolean writeAll = OptionsHelper.optionIsTrue(options, "write-all"); + + // Write table if: write-all is true OR table is not valid (for non-null formats) + if ((writeAll || !valid) && outFormat != null) { + String outPath = + outDir + "/" + FilenameUtils.getBaseName(tablePath) + "." + outFormat.toLowerCase(); + switch (outFormat.toLowerCase()) { + case "xlsx": + try (Workbook wb = outTable.asWorkbook("|"); + FileOutputStream fos = new FileOutputStream(outPath)) { + wb.write(fos); + } + break; + case "html": + try (PrintWriter out = new PrintWriter(outPath)) { + out.print(outTable.toHTML("|", standalone, true)); + } + break; + case "txt": + try (PrintWriter out = new PrintWriter(outPath)) { + for (String m : messages) { + out.println(m); + } + } + break; + } + } + } + return invalidTables; + } + + /** Given a map from IRIs to strings, return its inverse. */ + private static Map reverseIRILabelMap(Map source) { + HashMap target = new HashMap<>(); + for (Map.Entry entry : source.entrySet()) { + String reverseKey = entry.getValue(); + IRI reverseValue = entry.getKey(); + if (target.containsKey(reverseKey)) { + logger.warn( + "Duplicate rdfs:label \"{}\". Overwriting value \"{}\" with \"{}\"", + reverseKey, + target.get(reverseKey), + reverseValue); + } + target.put(reverseKey, reverseValue); + } + return target; + } + + /** + * Given an OWLClass describing a subject class from the ontology, an OWLClassExpression + * describing a rule to query that subject class against, a string representing the query types to + * use when evaluating the results of the query, and a list of strings describing a row from the + * CSV: Determine whether, for any of the given query types, the given subject is in the result + * set returned by the reasoner for that query type. Return true if it is in at least one of these + * result sets, and false if it is not. + */ + private boolean executeClassQuery( + OWLClass subjectClass, OWLClassExpression ruleCE, List row, String unsplitQueryType) + throws Exception { + + logger.debug( + String.format( + "execute_class_query(): Called with parameters: " + + "subjectClass: \"%s\", " + + "ruleCE: \"%s\", " + + "row: \"%s\", " + + "query type: \"%s\".", + subjectClass, ruleCE, row, unsplitQueryType)); + + // For each of the query types associated with the rule, check to see if the rule is satisfied + // thus interpreted. If it is, then we return true, since multiple query types are interpreted + // as a disjunction. If a query type is unrecognized, inform the user but continue on. + String[] queryTypes = unsplitQueryType.split("\\|"); + for (String queryType : queryTypes) { + if (unknownRuleType(queryType)) { + throw new Exception( + String.format(unrecognizedQueryTypeError, colNum + 1, queryType, unsplitQueryType)); + } + + RTypeEnum qType = query_type_to_rtenum_map.get(queryType); + if (qType == RTypeEnum.SUB + || qType == RTypeEnum.DIRECT_SUB + || qType == RTypeEnum.NOT_SUB + || qType == RTypeEnum.NOT_DIRECT_SUB) { + // Check to see if the subjectClass is a (direct) subclass of the given rule: + // Get direct and not bools + boolean direct = false; + if (qType == RTypeEnum.DIRECT_SUB || qType == RTypeEnum.NOT_DIRECT_SUB) { + direct = true; + } + boolean not = false; + if (qType == RTypeEnum.NOT_SUB || qType == RTypeEnum.NOT_DIRECT_SUB) { + not = true; + } + NodeSet subClassesFound = reasoner.getSubClasses(ruleCE, direct); + if (not && !subClassesFound.containsEntity(subjectClass) + || !not && subClassesFound.containsEntity(subjectClass)) { + // NOT and not in set OR in set + return true; + } + + } else if (qType == RTypeEnum.SUPER + || qType == RTypeEnum.DIRECT_SUPER + || qType == RTypeEnum.NOT_SUPER + || qType == RTypeEnum.NOT_DIRECT_SUPER) { + // Check to see if the subjectClass is a (direct) superclass of the given rule: + // Get direct and not bools + boolean direct = false; + if (qType == RTypeEnum.DIRECT_SUPER || qType == RTypeEnum.NOT_DIRECT_SUPER) { + direct = true; + } + boolean not = false; + if (qType == RTypeEnum.NOT_SUPER || qType == RTypeEnum.NOT_DIRECT_SUPER) { + not = true; + } + + NodeSet superClassesFound = reasoner.getSuperClasses(ruleCE, direct); + if (not && !superClassesFound.containsEntity(subjectClass) + || !not && superClassesFound.containsEntity(subjectClass)) { + // NOT and not in set OR in set + return true; + } + + } else if (qType == RTypeEnum.EQUIV || qType == RTypeEnum.NOT_EQUIV) { + // Check to see if the subjectClass is an equivalent of the given rule: + boolean not = false; + if (qType == RTypeEnum.NOT_EQUIV) { + not = true; + } + Node equivClassesFound = reasoner.getEquivalentClasses(ruleCE); + if (!not && equivClassesFound.contains(subjectClass) + || not && !equivClassesFound.contains(subjectClass)) { + return true; + } + + } else { + // Spit out an error in this case but continue validating the other rules: + logger.error( + String.format( + "%s validation not possible for OWLClass %s.", qType.getRuleType(), subjectClass)); + } + } + return false; + } + + /** + * Given an OWLClassExpression describing an unnamed subject class from the ontology, an + * OWLClassExpression describing a rule to query that subject class against, a string representing + * the query types to use when evaluating the results of the query, and a list of strings + * describing a row from the CSV: Determine whether, for any of the given query types, the given + * subject is in the result set returned by the reasoner for that query type. Return true if it is + * in at least one of these result sets, and false if it is not. + */ + private boolean executeGeneralizedClassQuery( + OWLClassExpression subjectCE, + OWLClassExpression ruleCE, + List row, + String unsplitQueryType) + throws Exception { + + logger.debug( + String.format( + "execute_generalized_class_query(): Called with parameters: " + + "subjectCE: \"%s\", " + + "ruleCE: \"%s\", " + + "row: \"%s\", " + + "query type: \"%s\".", + subjectCE, ruleCE, row, unsplitQueryType)); + + // For each of the query types associated with the rule, check to see if the rule is satisfied + // thus interpreted. If it is, then we return true, since multiple query types are interpreted + // as a disjunction. If a query type is unrecognized, inform the user but continue on. + String[] queryTypes = unsplitQueryType.split("\\|"); + for (String queryType : queryTypes) { + if (unknownRuleType(queryType)) { + throw new Exception( + String.format(unrecognizedQueryTypeError, colNum + 1, queryType, unsplitQueryType)); + } + + RTypeEnum qType = query_type_to_rtenum_map.get(queryType); + if (qType == RTypeEnum.SUB) { + // Check to see if the subjectClass is a subclass of the given rule: + OWLSubClassOfAxiom axiom = dataFactory.getOWLSubClassOfAxiom(subjectCE, ruleCE); + if (reasoner.isEntailed(axiom)) { + return true; + } + } else if (qType == RTypeEnum.NOT_SUB) { + // Check to see if the subjectClass is a subclass of the given rule: + OWLSubClassOfAxiom axiom = dataFactory.getOWLSubClassOfAxiom(subjectCE, ruleCE); + if (!reasoner.isEntailed(axiom)) { + return true; + } + } else if (qType == RTypeEnum.SUPER) { + // Check to see if the subjectClass is a superclass of the given rule: + OWLSubClassOfAxiom axiom = dataFactory.getOWLSubClassOfAxiom(ruleCE, subjectCE); + if (reasoner.isEntailed(axiom)) { + return true; + } + } else if (qType == RTypeEnum.NOT_SUPER) { + // Check to see if the subjectClass is a superclass of the given rule: + OWLSubClassOfAxiom axiom = dataFactory.getOWLSubClassOfAxiom(ruleCE, subjectCE); + if (!reasoner.isEntailed(axiom)) { + return true; + } + } else if (qType == RTypeEnum.EQUIV) { + OWLEquivalentClassesAxiom axiom = + dataFactory.getOWLEquivalentClassesAxiom(subjectCE, ruleCE); + if (reasoner.isEntailed(axiom)) { + return true; + } + } else if (qType == RTypeEnum.NOT_EQUIV) { + OWLEquivalentClassesAxiom axiom = + dataFactory.getOWLEquivalentClassesAxiom(subjectCE, ruleCE); + if (!reasoner.isEntailed(axiom)) { + return true; + } + } else { + // Spit out an error in this case but continue validating the other rules: + logger.error( + String.format( + "%s validation not possible for OWLClassExpression %s.", + qType.getRuleType(), subjectCE)); + } + } + return false; + } + + /** + * Given an OWLNamedIndividual describing a subject individual from the ontology, an + * OWLClassExpression describing a rule to query that subject individual against, a string + * representing the query types to use when evaluating the results of the query, and a list of + * strings describing a row from the CSV: Determine whether, for any of the given query types, the + * given subject is in the result set returned by the reasoner for that query type. Return true if + * it is in at least one of these result sets, and false if it is not. + */ + private boolean executeIndividualQuery( + OWLNamedIndividual subjectIndividual, + OWLClassExpression ruleCE, + List row, + String unsplitQueryType) + throws Exception { + + logger.debug( + String.format( + "execute_individual_query(): Called with parameters: " + + "subjectIndividual: \"%s\", " + + "ruleCE: \"%s\", " + + "row: \"%s\", " + + "query type: \"%s\".", + subjectIndividual, ruleCE, row, unsplitQueryType)); + + // For each of the query types associated with the rule, check to see if the rule is satisfied + // thus interpreted. If it is, then we return true, since multiple query types are interpreted + // as a disjunction. If a query type is unrecognized or not applicable to an individual, inform + // the user but continue on. + String[] queryTypes = unsplitQueryType.split("\\|"); + for (String queryType : queryTypes) { + if (unknownRuleType(queryType)) { + throw new Exception( + String.format(unrecognizedQueryTypeError, colNum + 1, queryType, unsplitQueryType)); + } + + RTypeEnum qType = query_type_to_rtenum_map.get(queryType); + if (qType == RTypeEnum.INSTANCE + || qType == RTypeEnum.DIRECT_INSTANCE + || qType == RTypeEnum.NOT_INSTANCE) { + boolean not = false; + if (qType == RTypeEnum.NOT_INSTANCE) { + not = true; + } + NodeSet instancesFound = + reasoner.getInstances(ruleCE, qType == RTypeEnum.DIRECT_INSTANCE); + if (not && !instancesFound.containsEntity(subjectIndividual) + || !not && instancesFound.containsEntity(subjectIndividual)) { + return true; + } + } else { + // Spit out an error in this case but continue validating the other rules: + logger.error( + String.format( + "%s validation not possible for OWLNamedIndividual %s.", + qType.getRuleType(), subjectIndividual)); + } + } + return false; + } + + /** + * Given a string describing a subject term, a string describing a rule to query that subject term + * against, a string representing the query types to use when evaluating the results of the query, + * and a list of strings describing a row from the CSV: Determine whether, for any of the given + * query types, the given subject is in the result set returned by the reasoner for that query + * type. Return true if it is in at least one of these result sets, and false if it is not. + */ + private boolean executeQuery( + String subject, String rule, List row, String unsplitQueryType) throws Exception { + logger.debug( + String.format( + "execute_query(): Called with parameters: " + + "subject: \"%s\", " + + "rule: \"%s\", " + + "row: \"%s\", " + + "query type: \"%s\".", + subject, rule, row, unsplitQueryType)); + + // Get the class expression corresponding to the rule that has been passed: + OWLClassExpression ruleCE = getClassExpression(rule); + if (ruleCE == null) { + report(String.format("Unable to parse rule \"%s %s\".", unsplitQueryType, rule)); + return false; + } + + // Try to extract the label corresponding to the subject term: + String subjectLabel = getLabelFromTerm(subject); + if (subjectLabel != null) { + // Figure out if it is an instance or a class and run the appropriate query + IRI subjectIri = labelToIRIMap.get(subjectLabel); + OWLEntity subjectEntity = OntologyHelper.getEntity(ontology, subjectIri); + try { + OWLNamedIndividual subjectIndividual = subjectEntity.asOWLNamedIndividual(); + return executeIndividualQuery(subjectIndividual, ruleCE, row, unsplitQueryType); + } catch (OWLRuntimeException e) { + try { + OWLClass subjectClass = subjectEntity.asOWLClass(); + return executeClassQuery(subjectClass, ruleCE, row, unsplitQueryType); + } catch (OWLRuntimeException ee) { + // This actually should not happen, since if the subject has a label it should either + // be a named class or a named individual: + logger.error( + String.format( + "While validating \"%s\" against \"%s %s\", encountered: %s", + subject, unsplitQueryType, rule, ee)); + return false; + } + } + } else { + // If no label corresponding to the subject term can be found, then try and parse it as a + // class expression and run a generalised query on it: + OWLClassExpression subjectCE = getClassExpression(subject); + if (subjectCE == null) { + logger.error(String.format("Unable to parse subject \"%s\".", subject)); + return false; + } + + try { + return executeGeneralizedClassQuery(subjectCE, ruleCE, row, unsplitQueryType); + } catch (UnsupportedOperationException e) { + logger.error("Generalized class expression queries are not supported by this reasoner."); + return false; + } + } + } + + /** + * Given a string describing a term from the ontology, parse it into a class expression expressed + * in terms of the ontology. If the parsing fails, write a warning statement to the log. + */ + private OWLClassExpression getClassExpression(String term) { + OWLClassExpression ce; + try { + ce = parser.parse(term); + } catch (OWLParserException e) { + // If the parsing fails the first time, try surrounding the term in single quotes: + try { + ce = parser.parse("'" + term + "'"); + } catch (OWLParserException ee) { + logger.warn( + String.format( + "Could not determine class expression from \"%s\".\n\t%s.", + term, e.getMessage().trim())); + return null; + } + } + return ce; + } + + /** + * Create a Cell object based on cell data from the input table. + * + * @param column Column that this Cell will go into + * @param cellData list of strings from the cell + * @return Cell object for output Table + */ + private Cell getCell(Column column, List cellData) { + if (outFormat == null) { + return new Cell(column, cellData); + } + + RendererType displayRenderer = outTable.getDisplayRendererType(); + RendererType sortRenderer = outTable.getSortRendererType(); + ShortFormProvider provider = column.getShortFormProvider(); + + List display = new ArrayList<>(); + List sort = new ArrayList<>(); + + for (String val : cellData) { + // Try to get IRI based on label + IRI iri = labelToIRIMap.getOrDefault(val, null); + OWLClassExpression expr = null; + if (iri == null) { + // Try to use parser as a backup if we couldn't get the IRI + // e.g., if value provided was a CURIE or other short form + try { + expr = parser.parse(val); + } catch (Exception e) { + // Do nothing + } + if (expr == null) { + // Not a class expression + display.add(val); + sort.add(val); + continue; + } + if (!expr.isAnonymous()) { + iri = expr.asOWLClass().getIRI(); + } + } + + if (iri != null) { + // Maybe add HTML link + if (outFormat.equalsIgnoreCase("html")) { + display.add(String.format("%s", iri.toString(), val)); + } else { + display.add(val); + } + sort.add(val); + } else { + // No IRI, the expression is anonymous + // Render based on display/sort renderers and provider + display.add(ExportOperation.renderManchester(displayRenderer, provider, expr)); + sort.add(ExportOperation.renderManchester(sortRenderer, provider, expr)); + } + } + return new Cell(column, display, sort); + } + + /** + * Given a string describing one of the classes in the ontology, in either the form of an IRI, an + * abbreviated IRI, or an rdfs:label, return the rdfs:label for that class. + */ + private String getLabelFromTerm(String term) { + if (term == null) { + return null; + } + + // Remove any surrounding single quotes from the term: + term = term.replaceAll("^\'|\'$", ""); + + // If the term is already a recognized label, then just send it back: + if (iriToLabelMap.containsValue(term)) { + return term; + } + + // Check to see if the term is a recognized IRI (possibly in short form), and if so return its + // corresponding label: + // TODO - short form might not work for everything, rework this + for (IRI iri : iriToLabelMap.keySet()) { + if (iri.toString().equals(term) || iri.getShortForm().equals(term)) { + return iriToLabelMap.get(iri); + } + } + + // If the label isn't recognized, just return null: + return null; + } + + /** + * Given a string in the form of a wildcard, and a list of strings representing a row of the CSV, + * return the rdfs:label contained in the position of the row indicated by the wildcard. + */ + private String getWildcardContents(String wildcard, List row) throws Exception { + if (!wildcard.startsWith("%")) { + logger.error(String.format("Invalid wildcard: \"%s\".", wildcard)); + return null; + } + + int colIndex = Integer.parseInt(wildcard.substring(1)) - 1; + if (colIndex >= row.size()) { + throw new Exception(String.format(columnOutOfRangeError, colNum + 1, wildcard, row.size())); + } + + String term = row.get(colIndex); + if (term == null || term.trim().equals("")) { + logger.info( + String.format( + "Failed to retrieve label from wildcard: %s. No term at position %d of this row.", + wildcard, colIndex + 1)); + return null; + } + + return term.trim(); + } + + /** + * Given a string specifying a list of rules of various types, return a map which contains, for + * each rule type present in the string, the list of rules of that type that have been specified. + */ + private Map> parseRules(String ruleString) throws Exception { + HashMap> ruleMap = new HashMap<>(); + // Skip over empty strings and strings that start with "##". + if (!ruleString.trim().equals("") && !ruleString.trim().startsWith("##")) { + // Rules are separated by semicolons: + String[] rules = ruleString.split("\\s*;\\s*"); + for (String rule : rules) { + // Skip any rules that begin with a '#' (these are interpreted as commented out): + if (rule.trim().startsWith("#")) { + continue; + } + // Each rule is of the form: but for the PRESENCE category, if + // is left out it is implicitly understood to be "true" + String[] ruleParts = rule.trim().split("\\s+", 2); + String ruleType = ruleParts[0].trim(); + String ruleContent; + if (ruleParts.length == 2) { + ruleContent = ruleParts[1].trim(); + } else { + RTypeEnum rTypeEnum = rule_type_to_rtenum_map.get(ruleType); + if (rTypeEnum != null && rTypeEnum.getRuleCat() == RCatEnum.PRESENCE) { + ruleContent = "true"; + } else { + throw new Exception(String.format(malformedRuleError, rule.trim())); + } + } + + // Add, to the map, a new empty list for the given ruleType if we haven't seen it before: + if (!ruleMap.containsKey(ruleType)) { + ruleMap.put(ruleType, new ArrayList<>()); + } + // Add the content of the given rule to the list of rules corresponding to its ruleType: + ruleMap.get(ruleType).add(ruleContent); + } + } + return ruleMap; + } + + /** + * Given a list of strings representing a row from the table, return true if any of the cells in + * the row has non-whitespace content. + */ + private boolean hasContent(List row) { + for (String cell : row) { + if (!cell.trim().equals("")) { + return true; + } + } + return false; + } + + /** + * Given a string, possibly containing wildcards, and a list of strings representing a row of the + * CSV, return a string in which all of the wildcards in the input string have been replaced by + * the rdfs:labels corresponding to the content in the positions of the row that they indicate. + */ + private List interpolateRule(String rule, List row) throws Exception { + // This is what will be returned: + List interpolatedRules = new ArrayList<>(); + + // If the rule only has whitespace in it, return an empty string back to the caller: + if (rule.trim().equals("")) { + interpolatedRules.add(""); + return interpolatedRules; + } + + // Look for wildcards within the given rule. These will be of the form %d where d is the number + // of the cell the wildcard is pointing to (e.g. %1 is the first cell). Then create a map from + // wildcard numbers to the terms that they point to, which we extract from the cell + // indicated by the wildcard number. In general the terms will be split by pipes within a + // cell. E.g. if the wildcard is %1 and the first cell contains 'term1|term2|term3' then add an + // entry to the wildcard map like: 1 -> ['term1', 'term2', 'term3']. + Matcher m = Pattern.compile("%(\\d+)").matcher(rule); + Map wildCardMap = new HashMap<>(); + while (m.find()) { + int key = Integer.parseInt(m.group(1)); + if (!wildCardMap.containsKey(key)) { + String wildcard = getWildcardContents(m.group(), row); + String[] terms = wildcard != null ? wildcard.split("\\|") : new String[] {null}; + wildCardMap.put(key, terms); + } + } + + // If the wildcard map is empty then the rule contained no wildcards. Just return it as it is: + if (wildCardMap.isEmpty()) { + interpolatedRules.add(rule); + return interpolatedRules; + } + + // Now interpolate the rule using the wildcard map. If any of the wildcards points to a cell + // with multiple terms, then we duplicate the rule for each term pointed to. Finally we return + // all of the rules generated. + for (int i : wildCardMap.keySet()) { + if (interpolatedRules.isEmpty()) { + // If we haven't yet interpolated anything, then base the current interpolation on the rule + // that has been passed as an argument to the function above, and generate an interpolated + // rule corresponding to every term corresponding to this key in the wildcard map. + for (String term : wildCardMap.get(i)) { + String label = getLabelFromTerm(term); + String interpolatedRule = + rule.replaceAll( + String.format("%%%d", i), label == null ? "(" + term + ")" : "'" + label + "'"); + interpolatedRules.add(interpolatedRule); + } + } else { + // If we have already interpolated some rules, then every string that has been interpolated + // thus far must be interpolated again for every term corresponding to this key in the + // wildcard map, and the list of interpolated rules is then replaced with the new list. + List tmpList = new ArrayList<>(); + for (String term : wildCardMap.get(i)) { + String label = getLabelFromTerm(term); + for (String intStr : interpolatedRules) { + String interpolatedRule = + intStr.replaceAll( + String.format("%%%d", i), label == null ? "(" + term + ")" : "'" + label + "'"); + tmpList.add(interpolatedRule); + } + } + interpolatedRules = tmpList; + } + } + + return interpolatedRules; + } + + /** + * Given the string `format` and a number of formatting variables, use the formatting variables to + * fill in the format string in the manner of C's printf function, and write the string to the + * Writer object (or XLSX workbook, or Jinja context) that belongs to ValidateOperation. If the + * parameter `showCoords` is true, then include the current row and column number in the output + * string. + */ + private void report(String format, Object... positionalArgs) { + // Any report of error means validation failed + valid = false; + + // Format the error message + String outStr = + String.format("At %s row %d, column %d: ", currentTable, rowNum + 1, colNum + 1); + outStr += String.format(format, positionalArgs); + if (!silent) { + // Print error if not silent + System.out.println(outStr); + } + + if (outFormat != null && !outFormat.equals("txt")) { + // We want to put formatting on cells with errors + if (outFormat.equals("xlsx")) { + // Set the style of the current cell to a red background with a white font: + currentCell.setFontColor(IndexedColors.WHITE); + currentCell.setCellPattern(FillPatternType.FINE_DOTS); + currentCell.setCellColor(IndexedColors.RED); + } else { + // Set the HTML class to bg-danger (red background with a white font) + currentCell.setHTMLClass("bg-danger"); + } + // Attach a comment to the cell + // If one for this cell already exists, add new comment to existing comment + String commentString = String.format(format, positionalArgs); + String currentComment = currentCell.getComment(); + if (currentComment != null) { + commentString = currentComment + "; " + commentString; + } + currentCell.setComment(commentString); + } else if (outFormat != null) { + // Add outStr to messages to be written to file + messages.add(outStr); + } + } + + /** + * Given a string describing a rule type, return a boolean indicating whether it is one of the + * rules recognized by ValidateOperation. + */ + private boolean unknownRuleType(String ruleType) { + return !rule_type_to_rtenum_map.containsKey(ruleType.split("\\|")[0]); + } + + /** + * Given a string describing the content of a rule and a string describing its rule type, return a + * simple map entry such that the `key` for the entry is the main clause of the rule, and the + * `value` for the entry is a list of the rule's when-clauses. Each when-clause is itself stored + * as an array of three strings, including the subject to which the when-clause is to be applied, + * the rule type for the when clause, and the actual axiom to be validated against the subject. + */ + private AbstractMap.SimpleEntry> separateRule(String rule, String ruleType) + throws Exception { + + // Check if there are any when clauses: + Matcher m = Pattern.compile("(\\(\\s*when\\s+.+\\))(.*)").matcher(rule); + String whenClauseStr; + if (!m.find()) { + // If there is no when clause, then just return back the rule string as it was passed with an + // empty when clause list: + logger.debug(String.format("No when-clauses found in rule: \"%s\".", rule)); + return new AbstractMap.SimpleEntry<>(rule, new ArrayList<>()); + } + + // Throw an exception if there is no main clause and this is not a PRESENCE rule: + if (m.start() == 0 && rule_type_to_rtenum_map.get(ruleType).getRuleCat() != RCatEnum.PRESENCE) { + throw new Exception(String.format(noMainError, colNum + 1, rule)); + } + + // Extract the actual content of the when-clause. + whenClauseStr = m.group(1); + whenClauseStr = whenClauseStr.substring("(when ".length(), whenClauseStr.length() - 1); + + // Don't fail just because there is some extra garbage at the end of the rule, but notify + // the user about it: + if (!m.group(2).trim().equals("")) { + logger.warn( + String.format("Ignoring string \"%s\" at end of rule \"%s\".", m.group(2).trim(), rule)); + } + + // Within each when clause, multiple subclauses separated by ampersands are allowed. Each + // subclass must be of the form: , where: is a (not + // necessarily interpolated) string describing either a label or a generalised DL class + // expression involving labels, and any label names containing spaces are enclosed within + // single quotes; is a possibly hyphenated alphanumeric string (which corresponds + // to one of the rule types defined above in RTypeEnum); and can take any form. + // Here we resolve each sub-clause of the when statement into a list of such triples. + ArrayList whenClauses = new ArrayList<>(); + for (String whenClause : whenClauseStr.split("\\s*&\\s*")) { + m = + Pattern.compile( + "^([^\'\\s()]+|\'[^\'()]+\'|\\(.+?\\))" + "\\s+([a-z\\-|]+)" + "\\s+(.*)$") + .matcher(whenClause); + + if (!m.find()) { + throw new Exception(String.format(malformedWhenClauseError, colNum + 1, whenClause)); + } + // Add the triple to the list of when clauses: + whenClauses.add(new String[] {m.group(1), m.group(2), m.group(3)}); + } + + // Now get the main part of the rule (i.e. the part before the when clause): + m = Pattern.compile("^(.+)\\s+\\(when\\s").matcher(rule); + if (m.find()) { + return new AbstractMap.SimpleEntry<>(m.group(1), whenClauses); + } + + // If no main clause is found, then if this is a PRESENCE rule, implicitly assume that the main + // clause is "true": + if (rule_type_to_rtenum_map.get(ruleType).getRuleCat() == RCatEnum.PRESENCE) { + return new AbstractMap.SimpleEntry<>("true", whenClauses); + } + + // We should never get here since we have already checked for an empty main clause earlier ... + logger.error( + String.format( + "Encountered unknown error while looking for main clause of rule \"%s\".", rule)); + // Return the rule as passed with an empty when clause list: + return new AbstractMap.SimpleEntry<>(rule, new ArrayList<>()); + } + + /** + * Given a string describing a rule, a rule of the type PRESENCE, and a string representing a cell + * from the CSV, determine whether the cell satisfies the given presence rule (e.g. is-required, + * is-empty). + */ + private String validatePresenceRule(String rule, RTypeEnum rType, String cell) throws Exception { + + logger.debug( + String.format( + "validate_presence_rule(): Called with parameters: " + + "rule: \"%s\", " + + "rule type: \"%s\", " + + "cell: \"%s\".", + rule, rType.getRuleType(), cell)); + + // Presence-type rules (is-required, is-excluded) must be in the form of a truth value: + if ((Arrays.asList("true", "t", "1", "yes", "y").indexOf(rule.toLowerCase()) == -1) + && (Arrays.asList("false", "f", "0", "no", "n").indexOf(rule.toLowerCase()) == -1)) { + throw new Exception( + String.format(invalidPresenceRuleError, colNum + 1, rule, rType.getRuleType())); + } + + // If the restriction isn't "true" then there is nothing to do. Just return: + if (Arrays.asList("true", "t", "1", "yes", "y").indexOf(rule.toLowerCase()) == -1) { + logger.debug( + String.format("Nothing to validate for rule: \"%s %s\"", rType.getRuleType(), rule)); + return null; + } + + String msg; + switch (rType) { + case REQUIRED: + if (cell.trim().equals("")) { + msg = + String.format( + "Cell is empty but rule: \"%s %s\" does not allow this.", + rType.getRuleType(), rule); + report(msg); + return msg; + } + break; + case EXCLUDED: + if (!cell.trim().equals("")) { + msg = + String.format( + "Cell is non-empty (\"%s\") but rule: \"%s %s\" does not allow this.", + cell, rType.getRuleType(), rule); + report(msg); + return msg; + } + break; + default: + msg = + String.format( + "%s validation of rule type: \"%s\" is not yet implemented.", + rType.getRuleCat(), rType.getRuleType()); + logger.error(msg); + return msg; + } + logger.info( + String.format("Validated \"%s %s\" against \"%s\".", rType.getRuleType(), rule, cell)); + return null; + } + + /** + * Given a string describing a cell from the CSV, a string describing a rule to be applied against + * that cell, a string describing the type of that rule, and a list of strings describing the row + * containing the given cell, validate the cell, indicating any validation errors via the output + * writer (or XLSX workbook). + */ + private String validateRule(String cell, String rule, List row, String ruleType) + throws Exception { + + logger.debug( + String.format( + "validate_rule(): Called with parameters: " + + "cell: \"%s\", " + + "rule: \"%s\", " + + "row: \"%s\", " + + "rule type: \"%s\".", + cell, rule, row, ruleType)); + + logger.info(String.format("Validating rule \"%s %s\" against \"%s\".", ruleType, rule, cell)); + if (unknownRuleType(ruleType)) { + throw new Exception(String.format(unrecognizedRuleTypeError, colNum + 1, ruleType)); + } + + // Separate the given rule into its main clause and optional when clauses: + AbstractMap.SimpleEntry> separatedRule = separateRule(rule, ruleType); + + // Evaluate and validate any when clauses for this rule first: + if (!validateWhenClauses(separatedRule.getValue(), row, colNum)) { + logger.debug("Not all when clauses have been satisfied. Skipping main clause"); + return null; + } + + // Once all of the when clauses have been validated, get the RTypeEnum representation of the + // primary rule type of this rule: + RTypeEnum primRType = rule_type_to_rtenum_map.get(ruleType.split("\\|")[0]); + + // If the primary rule type for this rule is not in the QUERY category, process it at this step + // and return control to the caller. The further steps below are only needed when queries are + // going to be sent to the reasoner. + if (primRType.getRuleCat() != RCatEnum.QUERY) { + return validatePresenceRule(separatedRule.getKey(), primRType, cell); + } + + // If the cell contents are empty, just return to the caller silently (if the cell is not + // expected to be empty, this will have been caught by one of the presence rules in the + // previous step, assuming such a rule is constraining the column). + if (cell.trim().equals("")) return null; + + // Get the axiom that the cell will be validated against: + String axiom = separatedRule.getKey(); + + // Send the query to the reasoner: + // Comment may be null on exception, empty on success, or a non-empty String on validation + // failure + // Non-empty strings get added to the Cell + boolean result = executeQuery(cell, axiom, row, ruleType); + String msg = null; + if (!result) { + msg = String.format("Validation failed for rule: \"%s %s %s\".", cell, ruleType, axiom); + report(msg); + } else { + logger.info(String.format("Validated: \"%s %s %s\".", cell, ruleType, axiom)); + } + return msg; + } + + /** + * Given a list of String arrays describing a list of when-clauses, and a list of Strings + * describing the row to which these when-clauses belong, validate the when-clauses one by one, + * returning false if any of them fails to be satisfied, and true if they are all satisfied. + */ + private boolean validateWhenClauses(List whenClauses, List row, int colNum) + throws Exception { + + for (String[] whenClause : whenClauses) { + String subject = whenClause[0].trim(); + // If the subject term is blank, then skip this clause: + if (subject.equals("")) { + continue; + } + + // Make sure all of the rule types in the when clause are of the right category: + String whenRuleType = whenClause[1]; + for (String whenRuleSubType : whenRuleType.split("\\|")) { + RTypeEnum whenSubRType = rule_type_to_rtenum_map.get(whenRuleSubType); + if (whenSubRType == null || whenSubRType.getRuleCat() != RCatEnum.QUERY) { + throw new Exception( + String.format( + invalidWhenTypeError, + colNum + 1, + String.join(" ", whenClause), + query_type_to_rtenum_map.keySet())); + } + } + + // Get the axiom to validate and send the query to the reasoner: + String axiom = whenClause[2]; + if (!executeQuery(subject, axiom, row, whenRuleType)) { + // If any of the when clauses fail to be satisfied, then we do not need to evaluate any + // of the other when clauses, or the main clause, since the main clause may only be + // evaluated when all of the when clauses are satisfied. + logger.info( + String.format( + "When clause: \"%s %s %s\" is not satisfied.", subject, whenRuleType, axiom)); + return false; + } else { + logger.info( + String.format("Validated when clause \"%s %s %s\".", subject, whenRuleType, axiom)); + } + } + // If we get to here, then all of the when clauses have been satisfied, so return true: + return true; + } + + /** + * An enum representation of the different categories of rules. We distinguish between queries, + * which involve queries to a reasoner, and presence rules, which check for the existence of + * content in a cell. + */ + private enum RCatEnum { + QUERY, + PRESENCE + } + + /** + * An enum representation of the different types of rules. Each rule type belongs to larger + * category, and is identified within the CSV file by a particular string. + */ + private enum RTypeEnum { + DIRECT_SUPER("direct-superclass-of", RCatEnum.QUERY), + NOT_SUPER("not-superclass-of", RCatEnum.QUERY), + NOT_DIRECT_SUPER("not-direct-superclass-of", RCatEnum.QUERY), + SUPER("superclass-of", RCatEnum.QUERY), + EQUIV("equivalent-to", RCatEnum.QUERY), + NOT_EQUIV("not-equivalent-to", RCatEnum.QUERY), + DIRECT_SUB("direct-subclass-of", RCatEnum.QUERY), + NOT_SUB("not-subclass-of", RCatEnum.QUERY), + NOT_DIRECT_SUB("not-direct-subclass-of", RCatEnum.QUERY), + SUB("subclass-of", RCatEnum.QUERY), + DIRECT_INSTANCE("direct-instance-of", RCatEnum.QUERY), + NOT_INSTANCE("not-instance-of", RCatEnum.QUERY), + INSTANCE("instance-of", RCatEnum.QUERY), + REQUIRED("is-required", RCatEnum.PRESENCE), + EXCLUDED("is-excluded", RCatEnum.PRESENCE); + + private final String ruleType; + private final RCatEnum ruleCat; + + RTypeEnum(String ruleType, RCatEnum ruleCat) { + this.ruleType = ruleType; + this.ruleCat = ruleCat; + } + + private String getRuleType() { + return ruleType; + } + + private RCatEnum getRuleCat() { + return ruleCat; + } + } +} diff --git a/robot-core/src/main/java/org/obolibrary/robot/Template.java b/robot-core/src/main/java/org/obolibrary/robot/Template.java index 47e62a477..605dee27c 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/Template.java +++ b/robot-core/src/main/java/org/obolibrary/robot/Template.java @@ -417,18 +417,18 @@ private void addTable(List> rows) throws Exception { } catch (IndexOutOfBoundsException e) { // Template row is longer than header row // Which means there is at least one header missing - throw new ColumnException(String.format(columnMismatchError, column + 1, name)); + throw new ColumnException(String.format(columnMismatchError, column, name)); } if (header.isEmpty()) { // Template string is not empty // Header string is empty - throw new ColumnException(String.format(columnMismatchError, column + 1, name)); + throw new ColumnException(String.format(columnMismatchError, column, name)); } // Validate the template string if (!TemplateHelper.validateTemplateString(template)) { throw new ColumnException( - String.format(unknownTemplateError, name, column + 1, headers.get(column), template)); + String.format(unknownTemplateError, name, column, headers.get(column), template)); } // Get the location of important columns @@ -505,8 +505,22 @@ private void addTable(List> rows) throws Exception { } // Add the rest of the tableRows to Template - for (int row = 2; row < rows.size(); row++) { - tableRows.add(rows.get(row)); + for (int rowNum = 2; rowNum < rows.size(); rowNum++) { + List row = rows.get(rowNum); + if (idColumn != -1) { + if (row.size() > idColumn && row.get(idColumn).trim().equals("")) { + continue; + } else if (row.size() <= idColumn) { + continue; + } + } else if (labelColumn != -1) { + if (row.size() > labelColumn && row.get(labelColumn).equals("")) { + continue; + } else if (row.size() <= labelColumn) { + continue; + } + } + tableRows.add(row); } } @@ -822,37 +836,34 @@ private void addClassAxioms(IRI iri, List row) throws Exception { // Subclass expression subclassExpressionColumns.put( column, - TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column + 1)); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column)); } else if (template.startsWith("EC")) { // Equivalent expression equivalentExpressionColumns.put( column, - TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column + 1)); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column)); } else if (template.startsWith("DC")) { // Disjoint expression disjointExpressionColumns.put( column, - TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column + 1)); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column)); } else if (template.startsWith("C") && !template.startsWith("CLASS_TYPE")) { // Use class type to determine what to do with the expression switch (classType) { case "subclass": subclassExpressionColumns.put( column, - TemplateHelper.getClassExpressions( - name, parser, template, value, rowNum, column + 1)); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column)); break; case "equivalent": intersectionEquivalentExpressionColumns.put( column, - TemplateHelper.getClassExpressions( - name, parser, template, value, rowNum, column + 1)); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column)); break; case "disjoint": disjointExpressionColumns.put( column, - TemplateHelper.getClassExpressions( - name, parser, template, value, rowNum, column + 1)); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column)); break; default: break; @@ -1106,7 +1117,7 @@ private void addObjectPropertyAxioms(IRI iri, List row) throws Exception // Unknown property type throw new RowParseException( String.format( - propertyTypeError, iri.getShortForm(), propertyType, rowNum, column + 1, name)); + propertyTypeError, iri.getShortForm(), propertyType, rowNum, column, name)); } } else if (template.startsWith("DOMAIN")) { // Handle domains @@ -1356,7 +1367,7 @@ private void addDataPropertyAxioms(IRI iri, List row) throws Exception { // Cannot use inverse with data properties throw new RowParseException( String.format( - propertyTypeError, iri.getShortForm(), propertyType, rowNum, column + 1, name)); + propertyTypeError, iri.getShortForm(), propertyType, rowNum, column, name)); } else if (template.startsWith("P ") && !template.startsWith("PROPERTY_TYPE")) { // Use property type to handle expression type Set expressions = @@ -1376,7 +1387,7 @@ private void addDataPropertyAxioms(IRI iri, List row) throws Exception { // Unknown property type throw new RowParseException( String.format( - propertyTypeError, iri.getShortForm(), propertyType, rowNum, column + 1, name)); + propertyTypeError, iri.getShortForm(), propertyType, rowNum, column, name)); } } else if (template.startsWith("DOMAIN")) { // Handle domains @@ -1832,7 +1843,7 @@ private void addIndividualAxioms(IRI iri, List row) throws Exception { template = template + " SPLIT=" + split; } Set typeExpressions = - TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column + 1); + TemplateHelper.getClassExpressions(name, parser, template, value, rowNum, column); for (OWLClassExpression ce : typeExpressions) { axioms.add(dataFactory.getOWLClassAssertionAxiom(ce, individual)); } @@ -1876,12 +1887,7 @@ private void addIndividualAxioms(IRI iri, List row) throws Exception { default: throw new RowParseException( String.format( - individualTypeError, - iri.getShortForm(), - individualType, - rowNum, - column + 1, - name)); + individualTypeError, iri.getShortForm(), individualType, rowNum, column, name)); } } } diff --git a/robot-core/src/main/java/org/obolibrary/robot/TemplateHelper.java b/robot-core/src/main/java/org/obolibrary/robot/TemplateHelper.java index 813ab8367..986d9522d 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/TemplateHelper.java +++ b/robot-core/src/main/java/org/obolibrary/robot/TemplateHelper.java @@ -448,7 +448,7 @@ public static Set getDataPropertyExpressions( } catch (OWLParserException e) { String cause = getManchesterErrorCause(e); throw new RowParseException( - String.format(manchesterParseError, sub, rowNum, column + 1, tableName, cause)); + String.format(manchesterParseError, sub, rowNum, column, tableName, cause)); } } } @@ -788,7 +788,7 @@ public static Set getObjectPropertyExpressions( } catch (OWLParserException e) { String cause = getManchesterErrorCause(e); throw new RowParseException( - String.format(manchesterParseError, sub, rowNum, column + 1, tableName, cause)); + String.format(manchesterParseError, sub, rowNum, column, tableName, cause)); } } } @@ -1098,7 +1098,7 @@ protected static OWLClassExpression tryParse( } catch (OWLParserException e) { String cause = getManchesterErrorCause(e); throw new RowParseException( - String.format(manchesterParseError, content, rowNum, column + 1, tableName, cause)); + String.format(manchesterParseError, content, rowNum, column, tableName, cause)); } return expr; } diff --git a/robot-core/src/main/java/org/obolibrary/robot/ValidateOperation.java b/robot-core/src/main/java/org/obolibrary/robot/ValidateOperation.java new file mode 100644 index 000000000..92b32b5c2 --- /dev/null +++ b/robot-core/src/main/java/org/obolibrary/robot/ValidateOperation.java @@ -0,0 +1,100 @@ +package org.obolibrary.robot; + +import java.util.*; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.manchestersyntax.parser.ManchesterOWLSyntaxClassExpressionParser; +import org.semanticweb.owlapi.model.*; +import org.semanticweb.owlapi.reasoner.OWLReasoner; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; +import org.semanticweb.owlapi.util.SimpleShortFormProvider; + +/** + * Implements the validate operation on a collection of tables + * + * @author Michael E. Cuffaro + */ +public class ValidateOperation { + + /** + * Return the default Validate options. + * + * @return map of default options + */ + public static Map getDefaultOptions() { + Map options = new HashMap<>(); + options.put("format", null); + options.put("standalone", "true"); + options.put("output-dir", null); + options.put("silent", "true"); + options.put("errors", null); + options.put("skip-row", "0"); + return options; + } + + /** + * Validate tables based on an ontology. + * + * @param tables tables to validate (map of table name to table contents) + * @param ontology OWLOntology to use to validate tables + * @param ioHelper IOHelper to resolve entities + * @param reasonerFactory OWLReasonerFactory to create reasoner + * @param options map of validate options + * @throws Exception on any problem + */ + public static List validate( + Map>> tables, + OWLOntology ontology, + IOHelper ioHelper, + OWLReasonerFactory reasonerFactory, + Map options) + throws Exception { + if (options == null) { + options = getDefaultOptions(); + } + + // Robot's custom quoted entity checker will be used for parsing class expressions: + QuotedEntityChecker checker = new QuotedEntityChecker(); + // Add the class that will be used for I/O and for handling short-form IRIs by the quoted entity + // checker: + checker.setIOHelper(new IOHelper()); + checker.addProvider(new SimpleShortFormProvider()); + + // Initialise the dataFactory and use it to add rdfs:label to the list of annotation properties + // which will be looked up in the ontology by the quoted entity checker when finding names. + OWLDataFactory dataFactory = OWLManager.getOWLDataFactory(); + checker.addProperty(dataFactory.getRDFSLabel()); + checker.addAll(ontology); + + // Create the parser using the data factory and entity checker. + ManchesterOWLSyntaxClassExpressionParser parser = + new ManchesterOWLSyntaxClassExpressionParser(dataFactory, checker); + + // Use the given reasonerFactory to initialise the reasoner based on the given ontology: + OWLReasoner reasoner = reasonerFactory.createReasoner(ontology); + String outFormat = options.getOrDefault("format", null); + String outDir = options.getOrDefault("output-dir", "."); + + TableValidator validator = + new TableValidator(ontology, ioHelper, parser, reasoner, outFormat, outDir); + + boolean silent = OptionsHelper.optionIsTrue(options, "silent"); + if (silent && outFormat != null) { + // Only toggle to silent if results are written to a file + validator.toggleLogging(); + } + + // Run validation over all tables + List result = validator.validate(tables, options); + + // Maybe save errors to their own table + String errorsPath = OptionsHelper.getOption(options, "errors", null); + if (errorsPath != null) { + List errors = validator.getErrors(); + if (errors.size() > 1) { + // Only one item in the errors means it is just the header + IOHelper.writeTable(errors, errorsPath); + } + } + return result; + } +} diff --git a/robot-core/src/main/java/org/obolibrary/robot/export/Cell.java b/robot-core/src/main/java/org/obolibrary/robot/export/Cell.java index 60f52c388..4a1694ff6 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/export/Cell.java +++ b/robot-core/src/main/java/org/obolibrary/robot/export/Cell.java @@ -24,6 +24,9 @@ public class Cell { private FillPatternType cellPattern = null; private IndexedColors fontColor = null; + // Styles for HTML output + private String htmlClass = null; + // Comment can appear as an XLSX Comment or an HTML tooltip // This is not required and can be returned null private String comment = null; @@ -134,6 +137,15 @@ public List getDisplayValues() { return displayValues; } + /** + * Get the HTML class of a cell. + * + * @return String HTML bootstrap class, or null + */ + public String getHTMLClass() { + return htmlClass; + } + /** * Get the font color for this cell in an XLSX workbook. * @@ -179,6 +191,15 @@ public void setComment(String comment) { this.comment = comment; } + /** + * Add an HTML class to this Cell. + * + * @param htmlClass String Bootstrap HTML class + */ + public void setHTMLClass(String htmlClass) { + this.htmlClass = htmlClass; + } + /** * Set the font color for this cell in an XLSX workbook. * diff --git a/robot-core/src/main/java/org/obolibrary/robot/export/Column.java b/robot-core/src/main/java/org/obolibrary/robot/export/Column.java index e1411cc01..01543730f 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/export/Column.java +++ b/robot-core/src/main/java/org/obolibrary/robot/export/Column.java @@ -1,5 +1,7 @@ package org.obolibrary.robot.export; +import java.util.List; +import java.util.Map; import javax.annotation.Nonnull; import javax.annotation.Nullable; import org.semanticweb.owlapi.model.*; @@ -22,6 +24,8 @@ public class Column { private OWLAnnotationProperty annotationProperty = null; private OWLDataProperty dataProperty = null; private OWLObjectProperty objectProperty = null; + private Map> rules = null; + private String displayRule = null; // Target object (e.g., annotation) for cell values // private OWLObject targetObject; @@ -120,6 +124,25 @@ public Column( setEntitySelect(entitySelect); } + /** + * Init a new column using one or more rules for validate. + * + * @param name Column name + * @param rules Column rules + * @param displayRule String raw display rule for output + */ + public Column( + String name, + Map> rules, + String displayRule, + @Nonnull ShortFormProvider shortFormProvider) { + this.name = name; + this.displayName = name; + this.rules = rules; + this.displayRule = displayRule; + this.shortFormProvider = shortFormProvider; + } + /** * Get the display name of a column. * @@ -129,6 +152,15 @@ public String getDisplayName() { return displayName; } + /** + * Get the display rule of a column for validation. + * + * @return String display rule + */ + public String getDisplayRule() { + return displayRule; + } + /** * Get the name of a column. * @@ -155,14 +187,12 @@ public OWLProperty getProperty() { return null; } - /** @return */ - @Nonnull + /** @return true if including anonymous entities in this column */ public boolean getIncludeAnonymous() { return includeAnonymous; } - /** @return */ - @Nonnull + /** @return true if including named entities in this column */ public boolean getIncludeNamed() { return includeNamed; } @@ -177,6 +207,16 @@ public IRI getIRI() { return iri; } + /** + * Return the rules used in column for validate, or null. + * + * @return map of rules or null + */ + @Nullable + public Map> getRules() { + return rules; + } + /** * Return the short form provider for this column * @@ -210,7 +250,12 @@ public void setSort(int sortOrder, boolean reverseSort) { this.reverseSort = reverseSort; } - /** @param entitySelect */ + /** + * Set the entity selection values (includeNamed and includeAnonymous) based on the entity select + * string: NAMED, ANON/ANONYMOUS, or ANY. + * + * @param entitySelect entity select string + */ private void setEntitySelect(String entitySelect) { switch (entitySelect.toLowerCase()) { case "named": diff --git a/robot-core/src/main/java/org/obolibrary/robot/export/Row.java b/robot-core/src/main/java/org/obolibrary/robot/export/Row.java index 8ad0f1cb1..2e3b86985 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/export/Row.java +++ b/robot-core/src/main/java/org/obolibrary/robot/export/Row.java @@ -20,6 +20,15 @@ public class Row { private static final List singles = Arrays.asList("CURIE", "ID", "IRI"); /** Init a new Row. */ + public Row() { + // empty constructor + } + + /** + * Init a new Row with a subject. + * + * @param subject IRI of subject of row + */ public Row(IRI subject) { this.subject = subject; } @@ -171,11 +180,18 @@ public String[] toArray(List columns, String split) { */ public String toHTML(List columns, String split) { StringBuilder sb = new StringBuilder(); + + // Start table row sb.append("\t\n"); + + // Iterate through columns and get the cell for each for (Column c : columns) { String columnName = c.getDisplayName(); Cell cell = cells.getOrDefault(columnName, null); String value; + String htmlClass = null; + String comment = null; + if (cell != null) { List values = cell.getDisplayValues(); if (values.size() > 1) { @@ -184,11 +200,28 @@ public String toHTML(List columns, String split) { values.stream().map(x -> x.replace(split, "\\" + split)).collect(Collectors.toList()); } value = String.join(split, values); + htmlClass = cell.getHTMLClass(); + comment = cell.getComment(); } else { value = ""; } - sb.append("\t\t\n"); + + // Set default HTML class + if (htmlClass == null) { + htmlClass = "bg-light"; + } + // Write cell as HTML + sb.append("\t\t\n"); } + + // Close table row sb.append("\t\n"); return sb.toString(); } diff --git a/robot-core/src/main/java/org/obolibrary/robot/export/Table.java b/robot-core/src/main/java/org/obolibrary/robot/export/Table.java index 729cbc451..88ef7fab1 100644 --- a/robot-core/src/main/java/org/obolibrary/robot/export/Table.java +++ b/robot-core/src/main/java/org/obolibrary/robot/export/Table.java @@ -1,5 +1,6 @@ package org.obolibrary.robot.export; +import com.google.common.collect.Sets; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; @@ -31,6 +32,8 @@ public class Table { private RendererType displayRenderer = null; private RendererType sortRenderer = null; + private static final Set basicFormats = Sets.newHashSet("tsv", "csv", "json", "xlsx"); + /** * Init a new Table. * @@ -43,11 +46,13 @@ public Table(String format) { sortColumns = new ArrayList<>(); // Set renderer types based on format - if (format.equalsIgnoreCase("tsv") || format.equalsIgnoreCase("csv")) { + if (format == null || basicFormats.contains(format.toLowerCase())) { displayRenderer = RendererType.OBJECT_RENDERER; } else if (format.equalsIgnoreCase("html")) { displayRenderer = RendererType.OBJECT_HTML_RENDERER; sortRenderer = RendererType.OBJECT_RENDERER; + } else { + // TODO - unknown format } } @@ -83,13 +88,31 @@ public Workbook asWorkbook(String split) { Sheet sheet = wb.getSheetAt(0); org.apache.poi.ss.usermodel.Row headerRow = sheet.createRow(0); int colIdx = 0; + Map rules = new HashMap<>(); for (Column c : columns) { String name = c.getDisplayName(); Cell xlsxCell = headerRow.createCell(colIdx); xlsxCell.setCellValue(name); + + String displayRule = c.getDisplayRule(); + if (displayRule != null) { + rules.put(colIdx, displayRule); + } colIdx++; } + // Maybe add rules + if (!rules.isEmpty()) { + org.apache.poi.ss.usermodel.Row rulesRow = sheet.createRow(sheet.getLastRowNum() + 1); + for (int idx = 0; idx <= colIdx; idx++) { + if (rules.containsKey(idx)) { + String rule = rules.get(idx); + Cell xlsxCell = rulesRow.createCell(idx); + xlsxCell.setCellValue(rule); + } + } + } + // Add rows for (Row row : rows) { row.addToWorkbook(wb, columns, split); @@ -215,26 +238,81 @@ public List toList(String split) { * @return HTML string */ public String toHTML(String split) { + return toHTML(split, true, false); + } + + /** + * Render the Table as an HTML string. + * + * @param split character to split multiple cell values on + * @param standalone if true, include header + * @param includeJS if true and standalone, include JS script for tooltips + * @return HTML string + */ + public String toHTML(String split, boolean standalone, boolean includeJS) { StringBuilder sb = new StringBuilder(); - sb.append("\n") - .append( - "\t\n") - .append("\n") - .append("\n") - .append("
").append(value).append("").append(value).append("
\n") + if (standalone) { + // Add opening tags, style, and maybe js scripts + sb.append("\n") + .append("\t\n"); + if (includeJS) { + sb.append("\t\n") + .append( + "\t\n") + .append( + "\t\n"); + } + sb.append("\n").append("\n"); + } + // Table start + sb.append("
\n") + .append("\n") .append("\n"); + // Add column headers + Map rules = new HashMap<>(); + int colIdx = 0; for (Column c : columns) { sb.append("\t\n"); + String displayRule = c.getDisplayRule(); + if (displayRule != null) { + rules.put(colIdx, displayRule); + } + colIdx++; } + sb.append("\n").append("\n"); - sb.append("\n"); + // Maybe add rules + if (!rules.isEmpty()) { + sb.append("\n").append("\n"); + for (int idx = 0; idx < colIdx; idx++) { + if (rules.containsKey(idx)) { + sb.append("\t\n"); + } else { + sb.append("\t\n"); + } + } + sb.append("\n").append("\n"); + } + // Add all table rows for (Row row : rows) { sb.append(row.toHTML(columns, split)); } - sb.append("
").append(c.getDisplayName()).append("
").append(rules.get(idx)).append("
"); - sb.append(""); + sb.append("\n"); + + if (standalone) { + // Add closing tag and script to activate tooltips + sb.append("\n"); + if (includeJS) { + sb.append("\n"); + } + } return sb.toString(); } diff --git a/robot-core/src/main/resources/validate-table-template.jinja2 b/robot-core/src/main/resources/validate-table-template.jinja2 new file mode 100644 index 000000000..086965aca --- /dev/null +++ b/robot-core/src/main/resources/validate-table-template.jinja2 @@ -0,0 +1,33 @@ + + + + {% for cell in headerRow %} + + {% endfor %} + + + + + {% for cell in rulesRow %} + + {% endfor %} + + + {% for row in dataRows %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} +
+ {{ cell }} +
+ {{ cell }} +
+ {{ cell.content }} + +
diff --git a/robot-core/src/main/resources/validate-template.jinja2 b/robot-core/src/main/resources/validate-template.jinja2 new file mode 100644 index 000000000..3630d024c --- /dev/null +++ b/robot-core/src/main/resources/validate-template.jinja2 @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + Validation report + + + +

Validation report

+ + {{ table }} + + + + + + + + diff --git a/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java b/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java index ce8cf6be3..6858638bc 100644 --- a/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java +++ b/robot-core/src/test/java/org/obolibrary/robot/IOHelperTest.java @@ -21,6 +21,19 @@ /** Tests for IOHelper. */ public class IOHelperTest extends CoreTest { + /** Tests converting row and column numbers to A1 notation. */ + @Test + public void testA1Notation() { + String a1 = IOHelper.cellToA1(1, 1); + assertEquals("A1", a1); + + a1 = IOHelper.cellToA1(100, 200); + assertEquals("GR100", a1); + + a1 = IOHelper.cellToA1(39, 8459); + assertEquals("LMI39", a1); + } + /** * Test adding prefixes using the addPrefixes method * diff --git a/robot-core/src/test/java/org/obolibrary/robot/ValidateOperationTest.java b/robot-core/src/test/java/org/obolibrary/robot/ValidateOperationTest.java new file mode 100644 index 000000000..7d0dd4278 --- /dev/null +++ b/robot-core/src/test/java/org/obolibrary/robot/ValidateOperationTest.java @@ -0,0 +1,64 @@ +package org.obolibrary.robot; + +import static org.junit.Assert.*; + +import java.io.*; +import java.net.URL; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.io.FileUtils; +import org.junit.Test; +import org.semanticweb.HermiT.ReasonerFactory; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.reasoner.OWLReasonerFactory; + +/** Tests for ValidateOperationTest */ +public class ValidateOperationTest extends CoreTest { + + /** + * Test of the validation operation on a sample 'immune exposures' CSV file. + * + * @throws IOException on any problem + */ + @Test + public void testImmuneExposuresValidation() throws Exception { + IOHelper ioHelper = new IOHelper(); + + InputStream tableStream = this.getClass().getResourceAsStream("/immune_exposures.csv"); + assert (tableStream != null); + List> tableData = IOHelper.readCSV(tableStream); + assert (tableData != null); + + URL res = this.getClass().getResource("/immune_exposures.csv"); + File file = Paths.get(res.toURI()).toFile(); + String tablePath = file.getAbsolutePath(); + + Map>> tables = new HashMap<>(); + tables.put(tablePath, tableData); + + InputStream owlStream = this.getClass().getResourceAsStream("/immune_exposures.owl"); + assert (owlStream != null); + OWLOntology ontology = ioHelper.loadOntology(owlStream); + assert (ontology != null); + + // Redirect STDOUT to an OutputStream wrapped in a PrintStream: + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + PrintStream prStream = new PrintStream(outStream); + System.setOut(prStream); + + // Call validate() with an outputPath of null to send output to STDOUT: + OWLReasonerFactory reasonerFactory = new ReasonerFactory(); + ValidateOperation.validate(tables, ontology, ioHelper, reasonerFactory, null); + + // Compare the output with the contents of a file in the resources directory which contains + // the output we expect to get: + String fileWithExpectedContents = + this.getClass().getResource("/immune_exposures-result.txt").getPath(); + assertNotEquals(fileWithExpectedContents, ""); + String expectedResult = FileUtils.readFileToString(new File(fileWithExpectedContents)); + + assertEquals(outStream.toString(), expectedResult); + } +} diff --git a/robot-core/src/test/resources/immune_exposures-result.txt b/robot-core/src/test/resources/immune_exposures-result.txt new file mode 100644 index 000000000..ed7464f21 --- /dev/null +++ b/robot-core/src/test/resources/immune_exposures-result.txt @@ -0,0 +1,18 @@ +Validating immune_exposures.csv ... +At immune_exposures.csv row 17, column 2: Cell is empty but rule: "is-required true" does not allow this. +At immune_exposures.csv row 17, column 4: Unable to parse rule "subclass-of hasMaterialBasisIn some (null)". +At immune_exposures.csv row 17, column 4: Validation failed for rule: "dengue hemorrhagic fever subclass-of hasMaterialBasisIn some (null)". +At immune_exposures.csv row 19, column 4: Cell is empty but rule: "is-required true" does not allow this. +At immune_exposures.csv row 20, column 6: Cell is empty but rule: "is-required true" does not allow this. +At immune_exposures.csv row 21, column 2: Validation failed for rule: "Dengue virus equivalent-to 'Dengue virus 2'". +At immune_exposures.csv row 21, column 3: Validation failed for rule: "NCBITaxon_11060 equivalent-to 'Dengue virus'". +At immune_exposures.csv row 22, column 4: Validation failed for rule: "Dengue virus 2 subclass-of 'disease name'". +At immune_exposures.csv row 22, column 4: Validation failed for rule: "Dengue virus 2 subclass-of hasMaterialBasisIn some 'Dengue virus'". +At immune_exposures.csv row 23, column 2: Validation failed for rule: "'Hepatovirus A' or blood equivalent-to 'blood'". +At immune_exposures.csv row 23, column 3: Validation failed for rule: "UBERON_0000178 equivalent-to ('Hepatovirus A' or blood)". +At immune_exposures.csv row 23, column 6: Cell is non-empty ("chronic") but rule: "is-excluded true" does not allow this. +At immune_exposures.csv row 24, column 4: Validation failed for rule: "wheat allergy equivalent-to 'dengue hemorrhagic fever'". +At immune_exposures.csv row 24, column 4: Validation failed for rule: "dengue hemorrhagic fever equivalent-to 'wheat allergy'". +At immune_exposures.csv row 24, column 4: Validation failed for rule: "wheat allergy subclass-of hasMaterialBasisIn some 'Dengue virus'". +At immune_exposures.csv row 24, column 5: Validation failed for rule: "DOID_3660 equivalent-to 'dengue hemorrhagic fever'". +At immune_exposures.csv row 24, column 5: Validation failed for rule: "DOID_12206 equivalent-to 'wheat allergy'". diff --git a/robot-core/src/test/resources/immune_exposures.csv b/robot-core/src/test/resources/immune_exposures.csv new file mode 100644 index 000000000..12fba024c --- /dev/null +++ b/robot-core/src/test/resources/immune_exposures.csv @@ -0,0 +1,27 @@ +exposure process reported,exposure material reported,exposure material id,disease reported,disease ontology id,disease stage reported,comment +is-required; subclass-of 'exposure process';,subclass-of|equivalent-to 'exposure material'; equivalent-to %3; is-required (when %1 subclass-of (administration or 'exposure without disease' or 'infectious disease' or 'allergic disease')); is-excluded (when %1 equivalent-to disease); is-excluded (when %1 subclass-of ('autoimmune disease' or cancer or 'no exposure' or unknown)),equivalent-to %2,subclass-of 'disease name'; subclass-of hasMaterialBasisIn some %2 (when %1 subclass-of not ('autoimmune disease' or cancer)); equivalent-to %5; is-required (when %1 subclass-of disease); is-excluded (when %1 equivalent-to administration); is-excluded (when %1 subclass-of (vaccination or transplant/transfusion or 'exposure without disease' or 'no exposure' or unknown)),equivalent-to %4,subclass-of 'disease stage'; is-required (when %1 subclass-of disease); is-excluded (when %1 subclass-of not disease), +vaccination,Varicella-zoster virus vaccine,VO_0000669,,,,PASS: vacciniation has only material +infectious challenge,Hepacivirus C,NCBITaxon_11103,,,,PASS: infectious challenge requires only material +transplant/transfusion,blood,UBERON_0000178,,,,PASS: transplant requires only material +infectious disease,Dengue virus,NCBITaxon_12637,dengue hemorrhagic fever,DOID_12206,acute/recent onset,"PASS: infectious disease requires material, disease, stage" +allergic disease,Triticum aestivum ,NCBITaxon_4565,wheat allergy,DOID_3660,post,"PASS: allergic disease requires material, disease, stage" +autoimmune disease,,,acquired immunodeficiency syndrome,DOID_635,chronic,"PASS: autoimmune disease requires disease, state; excludes material" +cancer,,,colon cancer,DOID_219,chronic,"PASS: cancer requires disease, state; excludes material" +asymptomatic infection/colonization,Dengue virus,NCBITaxon_12637,,,,PASS: asymptomatic requires only material +exposure with immune reactivity,Dengue virus 2,NCBITaxon_11060,,,,PASS: exposure with immune reactivity requires only material +exposure with documentation,Dengue virus,NCBITaxon_12637,,,,PASS: exposure with documentation requires only material +exposure to endemic/ubiquitous agent,Dengue virus,NCBITaxon_12637,,,,PASS: exposure to endemic requires only material +no exposure,,,,,,PASS: none requires no fields +unknown,,,,,,PASS: unknown requires no fields +administration,Hepacivirus C,NCBITaxon_11103,,,,PASS +exposure without disease,Dengue virus,NCBITaxon_12637,,,,PASS +disease,,,,,,PASS +infectious disease,,,dengue hemorrhagic fever,DOID_12206,acute/recent onset,FAIL: material required +disease or 'infectious challenge',,,'wheat allergy' or 'dengue hemorrhagic fever',DOID_12206 or DOID_3660,'acute/recent onset' or chronic,PASS +infectious disease,Dengue virus,NCBITaxon_12637,,,acute/recent onset,FAIL: disease required +infectious disease,Dengue virus,NCBITaxon_12637,dengue hemorrhagic fever,DOID_12206,,FAIL: stage required +infectious disease,Dengue virus,NCBITaxon_11060,dengue hemorrhagic fever,DOID_12206,acute/recent onset,FAIL: exposure material must match exposure material ID +infectious disease,Dengue virus,NCBITaxon_12637,Dengue virus 2,NCBITaxon_11060,acute/recent onset,FAIL: disease value must be a disease +administration or 'exposure without disease','Hepatovirus A' or blood,UBERON_0000178,,,chronic,"FAIL: exposure material must match exposure material ID, disease must be excluded when exposure process is not a subclass of disease" +infectious disease,Dengue virus,NCBITaxon_12637,dengue hemorrhagic fever|wheat allergy,DOID_12206|DOID_3660,acute/recent onset,FAIL: not all combinations of the entries in the multi-cells for columns D and E satisfy the given rules +infectious disease,Dengue virus,NCBITaxon_12637,dengue hemorrhagic fever|dengue hemorrhagic fever,DOID_12206|DOID_12206,acute/recent onset,PASS diff --git a/robot-core/src/test/resources/immune_exposures.owl b/robot-core/src/test/resources/immune_exposures.owl new file mode 100644 index 000000000..c2df4f71d --- /dev/null +++ b/robot-core/src/test/resources/immune_exposures.owl @@ -0,0 +1,667 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + organism + + + + + + + + + + + + + + + dengue hemorrhagic fever + + + + + + + + + colon cancer + + + + + + + + + + + + + + + wheat allergy + + + + + + + + + acquired immunodeficiency syndrome + + + + + + + + + Dengue virus 2 + + + + + + + + + Dengue virus + + + + + + + + + Triticum aestivum + + + + + + + + + blood + + + + + + + + + anatomical entity + + + + + + + + + vaccine + + + + + + + + + Varicella-zoster virus vaccine + + + + + + + + + acute/recent onset + + + + + + + + + + + + + + + administration + + + + + + + + + + + + + + + allergic disease + + + + + + + + + asymptomatic infection/colonization + + + + + + + + + autoimmune disease + + + + + + + + + cancer + + + + + + + + + chronic + + + + + + + + + + + + + + + hepatitis A + + + + + + + + + + + + + + + hepatitis C + + + + + + + + + + + + + + + hepatitis B + + + + + + + + + + + + + + + + + + + + + disease + + + + + + + + disease stage + + + + + + + + exposure material + + + + + + + + exposure process + + + + + + + + + exposure to endemic/ubiquitous agent + + + + + + + + + exposure with documentation + + + + + + + + + exposure with immune reactivity + + + + + + + + + + + + + + + exposure without disease + + + + + + + + + infectious challenge + + + + + + + + + + + + + + + infectious disease + + + + + + + + + Hepatitis B virus + + + + + + + + + Hepacivirus C + + + + + + + + + Hepatovirus A + + + + + + + + + no exposure + + + + + + + + + post + + + + + + + + + transplant/transfusion + + + + + + + + + unknown + + + + + + + + + vaccination + + + + + + + + disease name + + + + + + + + + + + + + Harry Smith + + + + + + + + Jolin Wu + + + + + + + + Pele + + + + + + + + Travis Semenko + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +