@prefix IAO: <http://purl.obolibrary.org/obo/IAO_> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dq: <https://purl.org/packagegraph/ontology/dq#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix pkg: <https://purl.org/packagegraph/ontology/core#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

dq:detectedAt a owl:DatatypeProperty ;
    rdfs:label "detected at"@en ;
    IAO:0000115 "The date and time when the data quality issue was first identified."@en ;
    rdfs:comment "Timestamp when the issue was first detected"@en ;
    rdfs:domain dq:DataQualityIssue ;
    rdfs:isDefinedBy dq: ;
    rdfs:range xsd:dateTime .

dq:detectedBy a owl:DatatypeProperty,
        owl:FunctionalProperty ;
    rdfs:label "detected by"@en ;
    IAO:0000115 """Identifies which ETL component discovered the data quality
        issue, e.g., 'enrich-github-vcs', 'collect-debian',
        'validate-data'. Used to trace issues back to their detection
        source."""@en ;
    rdfs:comment "The enricher, collector, or validator that found the issue"@en ;
    rdfs:domain dq:DataQualityIssue ;
    rdfs:isDefinedBy dq: ;
    rdfs:range xsd:string .

dq:field a owl:DatatypeProperty,
        owl:FunctionalProperty ;
    rdfs:label "affected field"@en ;
    IAO:0000115 """Identifies which data field or property contains the quality
        issue, e.g., 'commit.author.email', 'homepage', 'packageName'.
        Uses dot notation for nested fields."""@en ;
    rdfs:comment "The data field or property where the issue was found"@en ;
    rdfs:domain dq:DataQualityIssue ;
    rdfs:isDefinedBy dq: ;
    rdfs:range xsd:string .

dq:hasQualityIssue a owl:ObjectProperty ;
    rdfs:label "has quality issue"@en ;
    IAO:0000115 """Associates a resource (package, repository, maintainer, etc.)
        with a data quality issue that was discovered about it. The linked
        DataQualityIssue provides details about the issue type, affected field,
        raw value, and which component detected it."""@en ;
    rdfs:comment "Links any resource to a data quality issue affecting it"@en ;
    rdfs:isDefinedBy dq: ;
    rdfs:range dq:DataQualityIssue .

dq:issueType a owl:DatatypeProperty,
        owl:FunctionalProperty ;
    rdfs:label "issue type"@en ;
    IAO:0000115 """A machine-readable category string identifying the type of
        data quality issue, e.g., 'dead-repo', 'malformed-email',
        'invalid-homepage', 'missing-field'. Used for filtering and
        aggregating issues in SPARQL queries."""@en ;
    rdfs:comment "Category of the data quality issue"@en ;
    rdfs:domain dq:DataQualityIssue ;
    rdfs:isDefinedBy dq: ;
    rdfs:range xsd:string .

dq:rawValue a owl:DatatypeProperty,
        owl:FunctionalProperty ;
    rdfs:label "raw value"@en ;
    IAO:0000115 """The original raw value that was identified as problematic,
        preserved for debugging and analysis. Truncated to 500 characters
        for excessively long values."""@en ;
    rdfs:comment "The actual problematic value that triggered the issue"@en ;
    rdfs:domain dq:DataQualityIssue ;
    rdfs:isDefinedBy dq: ;
    rdfs:range xsd:string .

dq:severity a owl:DatatypeProperty ;
    rdfs:label "severity"@en ;
    IAO:0000115 """The severity level of the data quality issue:
        'info' for observations that may not require action,
        'warning' for issues that degrade data quality,
        'error' for issues that produce invalid or unusable data."""@en ;
    rdfs:comment "Severity level: info, warning, error"@en ;
    rdfs:domain dq:DataQualityIssue ;
    rdfs:isDefinedBy dq: ;
    rdfs:range xsd:string .

dq:DataQualityIssue a owl:Class ;
    rdfs:label "Data Quality Issue"@en ;
    IAO:0000115 """A structured record of a data quality problem discovered during
        ETL processing. Each issue is linked to the affected resource via
        dq:hasQualityIssue, enabling SPARQL queries to find packages,
        repositories, or other resources with specific quality problems."""@en ;
    rdfs:comment "A recorded data quality issue found during collection or enrichment"@en ;
    rdfs:isDefinedBy dq: .

dq: a owl:Ontology ;
    rdfs:label "Data Quality Ontology"@en ;
    dcterms:abstract """The Data Quality (DQ) ontology provides a structured way to record
        data quality issues discovered during ETL collection and enrichment.
        Issues are stored as first-class triples linked to the affected resources,
        making them queryable via SPARQL alongside the data they describe.

        This enables workflows such as:
        - Finding packages with dead upstream repositories
        - Identifying malformed maintainer emails in commit metadata
        - Tracking invalid homepage URLs across distributions
        - Monitoring data quality trends over time""" ;
    dcterms:created "2026-04-14"^^xsd:date ;
    dcterms:creator <https://packagegraph.github.io/> ;
    dcterms:license <https://creativecommons.org/publicdomain/zero/1.0/> ;
    dcterms:modified "2026-04-21"^^xsd:date ;
    dcterms:title "Data Quality Ontology" ;
    rdfs:comment "Ontology for recording and querying data quality issues in the PackageGraph knowledge graph"@en ;
    rdfs:isDefinedBy dq: ;
    rdfs:seeAlso <https://purl.org/packagegraph/ontology/dq/shapes> ;
    owl:imports pkg: ;
    owl:priorVersion <https://purl.org/packagegraph/ontology/dq/0.6.0> ;
    owl:versionIRI <https://purl.org/packagegraph/ontology/dq/0.7.0> ;
    owl:versionInfo "0.7.0" .

