{
  "sql": {
    "name": "SQL",
    "fullName": "Structured Query Language",
    "category": "Query",
    "governance": "ANSI / ISO/IEC 9075",
    "status": "Stable since 1986; revised regularly (latest SQL:2023)",
    "judgement": "Adopt",
    "judgementReason": "The universal data query language; not a decision.",
    "description": [
      "SQL is the standard query language for relational databases. It is the oldest and most universal de-facto standard in the data world.",
      "Despite vendor-specific dialects, the ISO/IEC 9075 core defines a portable subset that every serious analytical engine implements."
    ],
    "links": [
      {
        "label": "ISO/IEC 9075 standard",
        "url": "https://www.iso.org/standard/76583.html"
      },
      {
        "label": "Wikipedia: SQL",
        "url": "https://en.wikipedia.org/wiki/SQL"
      }
    ],
    "firstReleased": 1986,
    "logo": "/media/icons/standards-map/logos/iso.png",
    "umbrella": "ISO/IEC",
    "standardization": "formal-standard"
  },
  "mdx": {
    "name": "MDX",
    "fullName": "Multidimensional Expressions",
    "category": "Query",
    "governance": "Originally Microsoft (1997); referenced by the XMLA specification",
    "status": "Legacy; ubiquitous in OLAP/cube tooling, no significant new adoption",
    "judgement": "Caution",
    "judgementReason": "Multidimensional query language; surviving only in MS Analysis Services.",
    "description": [
      "Query language for multidimensional databases — the OLAP-cube counterpart to SQL. Used to express slice, dice, and rollup operations against star/snowflake schemas exposed as cubes.",
      "Originated with Microsoft OLE DB for OLAP and adopted by Microsoft Analysis Services, Mondrian, SAP BW, AtScale, and Oracle Essbase. Still pervasive in enterprise BI estates even though greenfield analytics has largely moved to SQL plus semantic layers."
    ],
    "links": [
      {
        "label": "Microsoft MDX reference",
        "url": "https://learn.microsoft.com/en-us/analysis-services/multidimensional-models/mdx/mdx-query-the-basic-query"
      },
      {
        "label": "Wikipedia: MultiDimensional eXpressions",
        "url": "https://en.wikipedia.org/wiki/MultiDimensional_eXpressions"
      }
    ],
    "firstReleased": 1997,
    "tier": "legacy",
    "logo": "/media/icons/standards-map/logos/microsoft.svg",
    "umbrella": "Microsoft",
    "standardization": "vendor-led"
  },
  "substrait": {
    "name": "Substrait",
    "fullName": "Substrait",
    "category": "Query",
    "governance": "Substrait Project (independent; ASF-inspired PMC)",
    "status": "Stable; growing adoption across query engines",
    "judgement": "Situational",
    "judgementReason": "Cross-engine query-plan IR — pick when you're building or integrating engines that need a portable plan format; end users rarely touch it.",
    "niche": true,
    "nicheReason": "Cross-engine query-plan IR that engines exchange — not a surface that users or analysts ever write. Adoption is concentrated inside engine internals (DuckDB, DataFusion, Velox, Ibis); most data teams will never touch it directly. Listed for the engine-builders and Ibis-style frontends.",
    "description": [
      "Cross-language, cross-engine specification for representing relational query plans. Where SQL is the query language for humans, Substrait is the serialised intermediate representation engines exchange.",
      "Adopted by DuckDB, Apache DataFusion, Velox, and Ibis (which compiles its DataFrame expressions to Substrait). Aims to be the portable IR between query frontends and execution engines, similar to LLVM IR for compilers."
    ],
    "standardReason": "Substrait is published as an open specification with protobuf-defined plan types, governed by an independent, ASF-inspired PMC. Multiple independent engines consume and produce Substrait plans, which is exactly the cross-vendor interchange contract that makes it a standard rather than a project.",
    "links": [
      {
        "label": "substrait.io",
        "url": "https://substrait.io"
      },
      {
        "label": "GitHub: substrait-io/substrait",
        "url": "https://github.com/substrait-io/substrait"
      }
    ],
    "firstReleased": 2021,
    "logo": "/media/icons/standards-map/logos/substrait.svg",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "opentelemetry": {
    "name": "Open​Telemetry",
    "fullName": "OpenTelemetry",
    "category": "Observability",
    "governance": "CNCF (Cloud Native Computing Foundation), Linux Foundation",
    "status": "Stable; widely adopted across cloud-native stacks",
    "judgement": "Adopt",
    "judgementReason": "Vendor-neutral observability standard.",
    "description": [
      "Vendor-neutral framework for collecting traces, metrics, and logs from distributed systems. The result of merging OpenTracing and OpenCensus.",
      "In a data mesh context, OpenTelemetry is the natural choice for instrumenting pipelines and platform services so a federated governance team can correlate behaviour across domains."
    ],
    "links": [
      {
        "label": "opentelemetry.io",
        "url": "https://opentelemetry.io"
      },
      {
        "label": "GitHub: open-telemetry",
        "url": "https://github.com/open-telemetry"
      }
    ],
    "firstReleased": 2019,
    "logo": "/media/icons/standards-map/logos/opentelemetry.svg",
    "umbrella": "CNCF",
    "standardization": "foundation"
  },
  "oors": {
    "name": "OORS",
    "fullName": "Open Observability Results Standard",
    "category": "Observability",
    "governance": "BITOL / Linux Foundation (draft)",
    "status": "Emerging — auxiliary BITOL standard, not yet released",
    "judgement": "Assess",
    "judgementReason": "BITOL observability/quality-result standard; new and under-adopted.",
    "description": [
      "Auxiliary BITOL standard answering: what does an observability or quality-check result look like? Designed to sit alongside ODCS and ODPS so any tool can publish and consume verifiable check results.",
      "Today every tool reports check outcomes in its own shape. OORS aims to give the marketplace, governance UI, and AI agents one consistent format to ingest."
    ],
    "links": [
      {
        "label": "BITOL project",
        "url": "https://bitol.io"
      }
    ],
    "firstReleased": 2025,
    "logo": "/media/icons/standards-map/logos/bitol.svg",
    "umbrella": "BITOL @ LF",
    "standardization": "foundation"
  },
  "odcs": {
    "name": "ODCS",
    "fullName": "Open Data Contract Standard",
    "category": "Contracts",
    "highlight": true,
    "governance": "BITOL / Linux Foundation",
    "status": "v3.1 stable; v3.2 in progress",
    "judgement": "Adopt",
    "judgementReason": "Winning data-contract spec; YAML, multi-vendor.",
    "description": [
      "YAML-based open standard for defining data contracts: schema, data quality, SLAs, terms of use, team ownership, servers. Think of it as OpenAPI, but for data.",
      "Originated as PayPal's internal Data Contract Template (donated as ODCS 2.x). Starting with v3.0 the standard was generalised for any enterprise."
    ],
    "links": [
      {
        "label": "Official site",
        "url": "https://bitol.io"
      },
      {
        "label": "GitHub: open-data-contract-standard",
        "url": "https://github.com/bitol-io/open-data-contract-standard"
      },
      {
        "label": "Spec (latest)",
        "url": "https://bitol-io.github.io/open-data-contract-standard/latest/"
      },
      {
        "label": "Data Contract CLI",
        "url": "https://github.com/datacontract/datacontract-cli"
      }
    ],
    "firstReleased": 2023,
    "logo": "/media/icons/standards-map/logos/bitol.svg",
    "umbrella": "BITOL @ LF",
    "standardization": "foundation"
  },
  "openapi": {
    "name": "OpenAPI",
    "fullName": "OpenAPI Specification",
    "category": "Contracts",
    "governance": "OpenAPI Initiative, Linux Foundation",
    "status": "Stable; OAS 3.1 current",
    "judgement": "Adopt",
    "judgementReason": "De-facto REST description with tooling for almost every language.",
    "description": [
      "Originally Swagger (2011), now the de-facto standard for describing REST APIs. Tooling exists for almost every language and platform.",
      "OpenAPI is the reference success story for what an open standard can become. ODCS is essentially trying to do for data what OpenAPI did for REST APIs."
    ],
    "links": [
      {
        "label": "openapis.org",
        "url": "https://www.openapis.org"
      },
      {
        "label": "GitHub: OAI/OpenAPI-Specification",
        "url": "https://github.com/OAI/OpenAPI-Specification"
      }
    ],
    "firstReleased": 2011,
    "logo": "/media/icons/standards-map/logos/openapi.svg",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "asyncapi": {
    "name": "AsyncAPI",
    "fullName": "AsyncAPI Specification",
    "category": "Contracts",
    "governance": "AsyncAPI Initiative, Linux Foundation",
    "status": "Stable; v3 current",
    "judgement": "Adopt",
    "judgementReason": "The OpenAPI-equivalent for event APIs once you commit to documenting them.",
    "description": [
      "Open-source standard for defining event-driven and message-based APIs across protocols like Kafka, AMQP, MQTT, WebSocket, and NATS.",
      "Where OpenAPI describes synchronous request/response, AsyncAPI describes asynchronous channels. The two specifications are intentionally aligned in style."
    ],
    "links": [
      {
        "label": "asyncapi.com",
        "url": "https://www.asyncapi.com"
      },
      {
        "label": "GitHub: asyncapi",
        "url": "https://github.com/asyncapi"
      }
    ],
    "firstReleased": 2017,
    "logo": "/media/icons/standards-map/logos/asyncapi.svg",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "graphql": {
    "name": "GraphQL",
    "fullName": "GraphQL",
    "category": "Contracts",
    "governance": "GraphQL Foundation, Linux Foundation",
    "status": "Stable; widely adopted",
    "judgement": "Adopt",
    "judgementReason": "Default for client-driven aggregation; mature spec, multi-language tooling, well-established at scale.",
    "description": [
      "Query language and runtime for APIs originally created at Facebook. Lets clients ask for exactly the data they need from a typed schema.",
      "In a data mesh, GraphQL appears mostly on the consumption side — an output-port style for application teams that prefer typed graph queries over SQL."
    ],
    "links": [
      {
        "label": "graphql.org",
        "url": "https://graphql.org"
      },
      {
        "label": "Specification",
        "url": "https://spec.graphql.org"
      }
    ],
    "firstReleased": 2015,
    "logo": "/media/icons/standards-map/logos/graphql.svg",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "grpc": {
    "name": "gRPC",
    "fullName": "gRPC Remote Procedure Call",
    "category": "Contracts",
    "governance": "CNCF (Incubating), Linux Foundation",
    "status": "Stable; ubiquitous in cloud-native stacks",
    "judgement": "Situational",
    "judgementReason": "Right for service-to-service binary RPC; wrong for browser clients.",
    "description": [
      "High-performance, language-agnostic RPC framework built on HTTP/2 and Protocol Buffers. Originated at Google, donated to CNCF.",
      "In a data architecture, gRPC shows up wherever services need fast, strongly-typed binary communication — internal service mesh, streaming pipelines, ML model serving."
    ],
    "links": [
      {
        "label": "grpc.io",
        "url": "https://grpc.io"
      },
      {
        "label": "GitHub: grpc",
        "url": "https://github.com/grpc/grpc"
      }
    ],
    "firstReleased": 2015,
    "logo": "/media/icons/standards-map/logos/grpc.png",
    "umbrella": "CNCF",
    "standardization": "foundation"
  },
  "dbt": {
    "name": "dbt",
    "fullName": "data build tool",
    "category": "Processing",
    "governance": "dbt Labs (vendor-driven open source)",
    "status": "De-facto standard for analytics engineering; vendor-governed format",
    "judgement": "Adopt",
    "judgementReason": "Analytics-engineering default: SQL-first models, tests, lineage.",
    "description": [
      "SQL-first transformation framework with built-in tests, documentation, and lineage. The de-facto standard for analytics engineering — used both to model warehouse data and to express the quality checks that ride alongside the transformations.",
      "Vendor-governed by dbt Labs rather than a foundation, so it isn't an open standard in the strict sense. Included here on the de-facto criterion: its YAML and Jinja-SQL conventions are what other tools (SQLMesh, Lightdash) interoperate with."
    ],
    "standardReason": "The dbt project artefacts — the manifest.json, the model YAML, the schema.yml tests — have become the interchange format for analytics engineering. SQLMesh, Lightdash, Elementary, dbt-osmosis and many other tools read or emit dbt's project structure rather than inventing their own. Quality checks declared in dbt are part of that surface. That makes the format itself the standard, even though dbt Labs governs it.",
    "links": [
      {
        "label": "getdbt.com",
        "url": "https://www.getdbt.com"
      }
    ],
    "firstReleased": 2016,
    "logo": "/media/icons/standards-map/logos/dbt.png",
    "umbrella": "dbt Labs",
    "umbrellaSearch": "dbt Labs vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "great-expectations": {
    "name": "Great Expectations",
    "fullName": "Great Expectations",
    "category": "Data Quality",
    "governance": "GX Labs (vendor-driven open source)",
    "status": "Widely adopted; vendor-driven",
    "judgement": "Situational",
    "judgementReason": "Python-first DQ; powerful but heavy.",
    "description": [
      "Python framework for declaring and validating expectations on data — nulls, ranges, distributions, distinctness.",
      "Open source and useful, but the spec is governed by one company, so it sits in the same vendor-driven bucket as dbt and SodaCL."
    ],
    "standardReason": "The expectations DSL and the JSON validation-result schema GX defines have become the reference vocabulary for declarative data quality in Python. Other tools embed or interoperate with Great Expectations rather than reinvent the same primitives. The standard part is the expectation language and its result format — not the company behind it.",
    "links": [
      {
        "label": "greatexpectations.io",
        "url": "https://greatexpectations.io"
      },
      {
        "label": "GitHub: great-expectations",
        "url": "https://github.com/great-expectations/great_expectations"
      }
    ],
    "firstReleased": 2018,
    "logo": "/media/icons/standards-map/logos/great-expectations.png",
    "umbrella": "GX Labs",
    "umbrellaSearch": "GX Labs vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "dbt-tests": {
    "name": "dbt tests",
    "fullName": "dbt schema and data tests",
    "category": "Data Quality",
    "governance": "dbt Labs (vendor-driven open source)",
    "status": "De-facto for analytics teams already on dbt",
    "judgement": "Situational",
    "judgementReason": "Right when your transformations already live in dbt; otherwise reach for a standalone DQ tool.",
    "description": [
      "The schema-test and data-test surface inside a dbt project — `tests:` blocks in schema.yml, generic and singular tests, and the test results that downstream tools (Elementary, dbt-osmosis, dbt artifacts) consume.",
      "Treated as a separate entry from dbt itself because the test surface is the part of dbt that data-quality tooling interoperates with. Adopt-grade if your transformations live in dbt; situational elsewhere because the tests can't easily be lifted out of a dbt project."
    ],
    "standardReason": "dbt's test definitions and the manifest entries they produce are what other DQ tools (Elementary, re_data, dbt-osmosis) read. The format is the standard surface even though dbt Labs governs it.",
    "links": [
      {
        "label": "docs.getdbt.com — Tests",
        "url": "https://docs.getdbt.com/docs/build/data-tests"
      }
    ],
    "firstReleased": 2016,
    "logo": "/media/icons/standards-map/logos/dbt.png",
    "umbrella": "dbt Labs",
    "umbrellaSearch": "dbt Labs vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "sodacl": {
    "name": "SodaCL",
    "fullName": "Soda Checks Language",
    "category": "Data Quality",
    "governance": "Soda (vendor-driven open source)",
    "status": "Widely adopted; vendor-driven",
    "judgement": "Situational",
    "judgementReason": "YAML-first DQ; lighter than Great Expectations, smaller ecosystem.",
    "description": [
      "YAML-based DSL for expressing data quality checks against tables and columns. Belgian roots — appropriately close to home for the Leuven meetup.",
      "Used under the hood by the Data Contract CLI to actually execute the quality checks declared in an ODCS contract."
    ],
    "standardReason": "SodaCL is the YAML check language Soda publishes — and the part that other tools have standardised on. The Data Contract CLI delegates ODCS quality checks to SodaCL because expressing checks portably matters more than running them in Soda Cloud. The check syntax is the standard surface, even though Soda governs it.",
    "links": [
      {
        "label": "docs.soda.io/soda-cl",
        "url": "https://docs.soda.io/soda-cl/soda-cl-overview.html"
      },
      {
        "label": "Soda Core (open source)",
        "url": "https://github.com/sodadata/soda-core"
      }
    ],
    "firstReleased": 2022,
    "logo": "/media/icons/standards-map/logos/soda.png",
    "umbrella": "Soda",
    "umbrellaSearch": "Soda vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "odps": {
    "name": "ODPS",
    "fullName": "Open Data Product Standard",
    "category": "Data Products",
    "highlight": true,
    "governance": "BITOL / Linux Foundation",
    "status": "v1.0",
    "judgement": "Adopt",
    "judgementReason": "Winning data-product spec; ODCS-aligned.",
    "description": [
      "YAML standard for describing data products. Mirrors ODCS on fundamentals, ownership, and terms, and adds input ports, output ports, and management ports.",
      "Each port references an ODCS contract. Combined, ODCS + ODPS yield the full data-mesh dependency graph — consumers, products, sources — derived directly from YAML."
    ],
    "note": "Acronym collision: \"ODPS\" is also used for the Open Data Product Specification, a separate LF project. Both display as ODPS here — disambiguate by governance (BITOL vs opendataproducts.org).",
    "links": [
      {
        "label": "GitHub: open-data-product-standard",
        "url": "https://github.com/bitol-io/open-data-product-standard"
      },
      {
        "label": "BITOL",
        "url": "https://bitol.io"
      }
    ],
    "firstReleased": 2025,
    "logo": "/media/icons/standards-map/logos/bitol.svg",
    "umbrella": "BITOL @ LF",
    "standardization": "foundation"
  },
  "dpds": {
    "name": "DPDS",
    "fullName": "Data Product Descriptor Specification",
    "category": "Data Products",
    "governance": "OpenDataMesh community",
    "status": "Active; community-driven specification",
    "judgement": "Assess",
    "judgementReason": "Coexists with BITOL's ODPS but with smaller community.",
    "description": [
      "YAML-based specification for describing data products with input ports, output ports, control ports, and discovery ports. Maintained by the OpenDataMesh initiative.",
      "Distinct from BITOL's ODPS: similar problem space, different community and slightly different mental model. Worth knowing both exist before you commit to one."
    ],
    "links": [
      {
        "label": "dpds.opendatamesh.org",
        "url": "https://dpds.opendatamesh.org/concepts/data-product-descriptor/"
      },
      {
        "label": "OpenDataMesh",
        "url": "https://www.opendatamesh.org"
      }
    ],
    "firstReleased": 2022,
    "logo": "/media/icons/standards-map/logos/dpds.png",
    "umbrella": "ODM",
    "standardization": "community",
    "umbrellaSearch": "OpenDataMesh"
  },
  "odpspec": {
    "name": "ODPS",
    "fullName": "Open Data Product Specification",
    "category": "Data Products",
    "governance": "opendataproducts.org / Linux Foundation",
    "status": "Active; mature pricing & i18n features",
    "judgement": "Assess",
    "judgementReason": "LF-governed data-product spec; strong on commercial terms, modest adoption.",
    "description": [
      "Standalone open specification for describing data products, governed under the Linux Foundation umbrella. Strong on pricing plans and internationalisation, making it a natural fit for external data marketplaces with complex commercial terms.",
      "Different mental model from BITOL's ODPS: no concept of input ports, at most one linked contract at the top level. Both are valid — they solve different problems."
    ],
    "links": [
      {
        "label": "opendataproducts.org",
        "url": "https://opendataproducts.org"
      }
    ],
    "firstReleased": 2022,
    "tier": "stable",
    "logo": "/media/icons/standards-map/logos/odpspec.png",
    "umbrella": "LF",
    "standardization": "foundation",
    "umbrellaSearch": "LF opendataproducts"
  },
  "dprod": {
    "name": "DPROD",
    "fullName": "Data Product Vocabulary (DPROD)",
    "category": "Data Products",
    "governance": "EKGF / Object Management Group (OMG) — RDF/OWL vocabulary",
    "status": "Active in semantic-web circles",
    "judgement": "Assess",
    "judgementReason": "RDF/OWL data-product vocabulary; right only when your stack already lives in linked data.",
    "description": [
      "RDF/OWL vocabulary for describing data products. Composes naturally with other semantic-web vocabularies (DCAT, SKOS, FIBO, etc.) when your stack already lives in the linked-data world.",
      "Where BITOL's ODPS is YAML and strict, DPROD is RDF and graph-native. Both have their place; the format choice determines what else you can compose with."
    ],
    "links": [
      {
        "label": "DPROD specification",
        "url": "https://ekgf.github.io/dprod/"
      }
    ],
    "firstReleased": 2024,
    "logo": "/media/icons/standards-map/logos/dprod.jpg",
    "umbrella": "OMG",
    "standardization": "formal-standard"
  },
  "iceberg": {
    "name": "Iceberg",
    "fullName": "Apache Iceberg",
    "category": "Open Table Formats",
    "governance": "Apache Software Foundation",
    "status": "Stable; effectively the winner of the table-format race",
    "judgement": "Adopt",
    "judgementReason": "Default open table format for new lakes; multi-vendor catalog support and ASF governance.",
    "description": [
      "Open table format for huge analytic datasets — schema evolution, hidden partitioning, time travel, ACID semantics on object storage.",
      "Originated at Netflix; now a default in Snowflake, Databricks, AWS, and most query engines. Increasingly the substrate that other layers (catalogs, lakehouses) build on."
    ],
    "standardReason": "Iceberg's table-format specification — manifest files, metadata layout, snapshot semantics — is published openly and re-implemented by every major engine: Snowflake, Databricks, Trino, Spark, Dremio, DuckDB. The format itself is the standard; the original Netflix implementation is just one client of it.",
    "links": [
      {
        "label": "iceberg.apache.org",
        "url": "https://iceberg.apache.org"
      }
    ],
    "firstReleased": 2017,
    "logo": "/media/icons/standards-map/logos/iceberg.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "delta": {
    "name": "Delta",
    "fullName": "Delta Lake",
    "category": "Open Table Formats",
    "governance": "Linux Foundation (originally Databricks)",
    "status": "Stable; widely deployed",
    "judgement": "Situational",
    "judgementReason": "Reach-For if you're on Databricks; Situational elsewhere.",
    "description": [
      "Open-source storage layer that brings ACID transactions, schema enforcement, and time travel to data lakes. The original lakehouse format.",
      "Delta Universal Format (UniForm) now also writes Iceberg-compatible metadata, narrowing the gap between the two formats."
    ],
    "standardReason": "The Delta transaction-log protocol is published as an open spec and implemented by readers and writers beyond Databricks — delta-rs, Trino, Flink, Spark. The standard surface is the protocol, not the Databricks runtime; that is what makes a Delta table portable across engines.",
    "links": [
      {
        "label": "delta.io",
        "url": "https://delta.io"
      }
    ],
    "firstReleased": 2019,
    "logo": "/media/icons/standards-map/logos/delta.png",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "hudi": {
    "name": "Hudi",
    "fullName": "Apache Hudi",
    "category": "Open Table Formats",
    "governance": "Apache Software Foundation",
    "status": "Stable; strong streaming/upsert focus",
    "judgement": "Assess",
    "judgementReason": "Third-place table format; legitimate CDC-upsert use cases but lost the default slot.",
    "description": [
      "Transactional data lake platform that pioneered incremental processing and upserts on data lakes.",
      "Less mainstream than Iceberg or Delta today, but still the table format of choice when streaming upserts and CDC are first-class requirements."
    ],
    "standardReason": "Hudi publishes its on-disk table format and timeline protocol as an open spec, and engines beyond the Hudi runtime (Spark, Flink, Trino, Presto) read and write it directly. The format is the standard part; the platform tooling is one implementation of it.",
    "links": [
      {
        "label": "hudi.apache.org",
        "url": "https://hudi.apache.org"
      }
    ],
    "firstReleased": 2017,
    "logo": "/media/icons/standards-map/logos/hudi.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "ducklake": {
    "name": "DuckLake",
    "fullName": "DuckLake",
    "category": "Catalog APIs",
    "governance": "DuckDB Labs (vendor-driven open source)",
    "status": "Emerging; v1.0 production-ready (April 2026), adoption concentrated in the DuckDB ecosystem",
    "judgement": "Assess",
    "judgementReason": "DuckDB-Labs catalog; v1.0 production-ready, ecosystem still small.",
    "tier": "emerging",
    "description": [
      "Lakehouse catalog format from DuckDB Labs that puts the catalog inside a regular SQL database — no separate REST catalog, no manifest sprawl. Designed for simplicity and zero-infrastructure analytics."
    ],
    "standardReason": "DuckLake's catalog schema is published as an open SQL specification — any database that speaks SQL can host a DuckLake catalog. Listed on the de-facto criterion: the schema is the standard surface, even though DuckDB Labs governs it and adoption is still early.",
    "links": [
      {
        "label": "ducklake.select",
        "url": "https://ducklake.select"
      },
      {
        "label": "duckdb.org",
        "url": "https://duckdb.org"
      }
    ],
    "firstReleased": 2025,
    "logo": "/media/icons/standards-map/logos/duckdb.svg",
    "umbrella": "DuckDB Labs",
    "umbrellaSearch": "DuckDB Labs vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "xml-schema": {
    "name": "XML Schema",
    "fullName": "XML Schema Definition (XSD)",
    "category": "Schema",
    "governance": "W3C",
    "status": "Stable since 2001",
    "judgement": "Adopt",
    "judgementReason": "The schema language for XML — load-bearing in enterprise integration, finance, healthcare, and government with mature tooling.",
    "description": [
      "W3C standard for describing the structure, content, and constraints of XML documents.",
      "Still the schema language of record in many enterprise integration scenarios — SOAP, banking, healthcare, and government."
    ],
    "links": [
      {
        "label": "W3C: XML Schema",
        "url": "https://www.w3.org/XML/Schema"
      }
    ],
    "firstReleased": 2001,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "json-schema": {
    "name": "JSON Schema",
    "fullName": "JSON Schema",
    "category": "Schema",
    "governance": "JSON Schema community / IETF",
    "status": "Latest draft 2020-12; published independently after leaving the IETF Internet-Draft track",
    "judgement": "Adopt",
    "judgementReason": "Default API-payload validation; behind OpenAPI/AsyncAPI.",
    "description": [
      "Vocabulary for validating, annotating, and documenting JSON. The schema language of choice for modern HTTP APIs and event payloads.",
      "ODCS uses JSON Schema concepts for typing complex column structures, and the OpenAPI / AsyncAPI specs are themselves anchored in JSON Schema."
    ],
    "links": [
      {
        "label": "json-schema.org",
        "url": "https://json-schema.org"
      }
    ],
    "firstReleased": 2009,
    "tier": "stable",
    "logo": "/media/icons/standards-map/logos/json-schema.png",
    "umbrella": "IETF",
    "standardization": "formal-standard"
  },
  "sql-ddl": {
    "name": "SQL DDL",
    "fullName": "SQL Data Definition Language",
    "category": "Schema",
    "governance": "ANSI / ISO/IEC 9075",
    "status": "Stable; per-vendor extensions are everywhere",
    "judgement": "Adopt",
    "judgementReason": "Universal way to describe a relational schema; portable across engines.",
    "description": [
      "The CREATE / ALTER / DROP subset of SQL — the most universal way to describe a relational schema.",
      "The Data Contract CLI imports SQL DDL into ODCS contracts and exports back to it, because every relational engine speaks at least a flavour of it."
    ],
    "links": [
      {
        "label": "ISO/IEC 9075 standard",
        "url": "https://www.iso.org/standard/76583.html"
      }
    ],
    "firstReleased": 1986,
    "logo": "/media/icons/standards-map/logos/iso.png",
    "umbrella": "ISO/IEC",
    "standardization": "formal-standard"
  },
  "avro-schema": {
    "name": "AVRO Schema",
    "fullName": "Apache Avro Schema",
    "category": "Schema",
    "governance": "Apache Software Foundation",
    "status": "Stable; ubiquitous in streaming",
    "judgement": "Adopt",
    "judgementReason": "Schema language for Kafka payloads; pairs with Schema Registry.",
    "description": [
      "JSON-based schema language used to describe Avro-encoded records. Standard in Kafka via Confluent Schema Registry.",
      "Strong support for schema evolution rules — a key reason it has stuck around as the default schema dialect for event streams."
    ],
    "links": [
      {
        "label": "avro.apache.org",
        "url": "https://avro.apache.org"
      }
    ],
    "firstReleased": 2009,
    "logo": "/media/icons/standards-map/logos/avro.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "protobuf": {
    "name": "Protobuf",
    "fullName": "Protocol Buffers",
    "category": "Schema",
    "governance": "Google (open source, vendor-driven)",
    "status": "Stable; de-facto schema for binary RPC",
    "judgement": "Adopt",
    "judgementReason": "Strong schema-evolution rules; default for gRPC.",
    "description": [
      "Google's language-neutral, platform-neutral mechanism for serialising structured data. Compact binary encoding plus strong schema evolution rules.",
      "The schema language behind gRPC and many internal RPC stacks. Open source but governed by Google, so it sits in the same vendor-driven bucket as dbt."
    ],
    "standardReason": "The .proto language and the wire format are publicly specified, with compilers and runtimes for every major language. gRPC, Schema Registry, and countless internal RPC stacks depend on the format definition rather than Google's implementation. That is the standard surface, even though Google governs it.",
    "links": [
      {
        "label": "protobuf.dev",
        "url": "https://protobuf.dev"
      },
      {
        "label": "GitHub: protobuf",
        "url": "https://github.com/protocolbuffers/protobuf"
      }
    ],
    "firstReleased": 2008,
    "logo": "/media/icons/standards-map/logos/google.svg",
    "umbrella": "Google",
    "umbrellaSearch": "Google vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "iceberg-catalog": {
    "name": "Iceberg Catalog",
    "fullName": "Iceberg REST Catalog",
    "category": "Catalog APIs",
    "governance": "Apache Software Foundation",
    "status": "Stable; rapidly becoming the default catalog protocol",
    "judgement": "Adopt",
    "judgementReason": "Canonical catalog API for Iceberg; multi-vendor implementations.",
    "description": [
      "Open REST API specification for Iceberg metadata catalogs. Decouples engines (Spark, Trino, Snowflake, Dremio, ...) from the catalog backend.",
      "Together with Iceberg the table format, this is what is making \"open lakehouse\" something more than marketing."
    ],
    "links": [
      {
        "label": "Iceberg REST catalog",
        "url": "https://iceberg.apache.org/terms/#catalog"
      }
    ],
    "firstReleased": 2022,
    "logo": "/media/icons/standards-map/logos/iceberg.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "unity-catalog": {
    "name": "Unity Catalog",
    "fullName": "Unity Catalog",
    "category": "Catalog APIs",
    "governance": "Linux Foundation (originally Databricks)",
    "status": "Open-sourced 2024; growing ecosystem",
    "judgement": "Situational",
    "judgementReason": "LF-open-sourced but still Databricks-tilted in practice.",
    "description": [
      "Unified governance layer for tables, files, models, and functions, originally built inside Databricks and donated to the Linux Foundation.",
      "Now competing with Iceberg REST as a catalog protocol. The two camps overlap and influence each other; expect convergence over the next couple of years."
    ],
    "standardReason": "Unity Catalog publishes a public REST API that other catalogs and engines can implement and consume. The API is the standard surface — that is what makes \"speaks Unity\" a portability claim rather than a Databricks lock-in. The reference implementation is open source under the Linux Foundation.",
    "links": [
      {
        "label": "unitycatalog.io",
        "url": "https://www.unitycatalog.io"
      },
      {
        "label": "GitHub: unitycatalog",
        "url": "https://github.com/unitycatalog/unitycatalog"
      }
    ],
    "firstReleased": 2024,
    "logo": "/media/icons/standards-map/logos/unity-catalog.svg",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "hive-metastore": {
    "name": "Hive Metastore",
    "fullName": "Apache Hive Metastore",
    "category": "Catalog APIs",
    "governance": "Apache Software Foundation",
    "status": "Stable; ubiquitous in lakehouse engines, slowly being replaced by Iceberg REST and Unity Catalog",
    "judgement": "Caution",
    "judgementReason": "The catalog Iceberg REST is displacing; maintain only.",
    "description": [
      "The metastore protocol of the Hadoop era. Long-standing de-facto standard for big-data catalogs — spoken by Spark, Trino, Presto, and most lakehouse engines.",
      "Slowly being superseded by Iceberg REST and Unity Catalog, but installed bases will keep it relevant for many years to come."
    ],
    "standardReason": "The Hive Metastore Thrift API is publicly defined and re-implemented by every major lakehouse engine. \"Speaks the Hive Metastore API\" became the portability test for a generation of big-data tools — the API is the standard, independent of the original Hive server.",
    "links": [
      {
        "label": "hive.apache.org",
        "url": "https://hive.apache.org"
      }
    ],
    "firstReleased": 2010,
    "logo": "/media/icons/standards-map/logos/hive-metastore.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache",
    "tier": "stable"
  },
  "schema-registry": {
    "name": "Schema Registry",
    "fullName": "Confluent Schema Registry",
    "category": "Catalog APIs",
    "governance": "Confluent (open API; multiple compatible implementations)",
    "status": "De-facto standard for streaming schemas",
    "judgement": "Adopt",
    "judgementReason": "The default Kafka schema-management surface.",
    "description": [
      "A central registry for the schemas of messages flowing through Kafka topics. Producers register Avro, JSON Schema, or Protobuf schemas; consumers fetch them by ID to deserialise safely. Without it, \"what shape is this Kafka topic?\" has no answer.",
      "The REST API was designed by Confluent but the protocol is widely re-implemented: Apicurio Registry (Red Hat), AWS Glue Schema Registry, Karapace (Aiven), and Redpanda Schema Registry all speak it. That makes it the de-facto schema catalog of the streaming world, even if it is not formally standardised."
    ],
    "standardReason": "Confluent's Schema Registry REST API is publicly documented and re-implemented by Apicurio (Red Hat), AWS Glue, Karapace (Aiven), and Redpanda. Producers and consumers depend on the API contract, not on Confluent's server — which is what promotes it from a vendor product to a de-facto standard.",
    "links": [
      {
        "label": "Confluent Schema Registry",
        "url": "https://docs.confluent.io/platform/current/schema-registry/index.html"
      },
      {
        "label": "Apicurio Registry",
        "url": "https://www.apicur.io/registry/"
      }
    ],
    "firstReleased": 2014,
    "logo": "/media/icons/standards-map/logos/confluent.svg",
    "umbrella": "Confluent",
    "umbrellaSearch": "Confluent vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "csv": {
    "name": "CSV",
    "fullName": "Comma-Separated Values (RFC 4180)",
    "category": "File Formats",
    "governance": "IETF (RFC 4180)",
    "status": "Stable; the lowest common denominator for tabular data",
    "judgement": "Adopt",
    "judgementReason": "Universal flat file; ugly but ubiquitous.",
    "description": [
      "Plain-text tabular format. RFC 4180 specifies a baseline; in practice every tool has its own dialect for quoting, escaping, and line endings.",
      "Loved and hated in equal measure: terrible for nested data, perfect for spreadsheets, mandatory in data lake on-ramps."
    ],
    "links": [
      {
        "label": "RFC 4180",
        "url": "https://www.ietf.org/rfc/rfc4180.txt"
      }
    ],
    "firstReleased": 2005,
    "logo": "/media/icons/standards-map/logos/ietf.svg",
    "umbrella": "IETF",
    "standardization": "formal-standard"
  },
  "json": {
    "name": "JSON",
    "fullName": "JavaScript Object Notation",
    "category": "File Formats",
    "governance": "IETF RFC 8259 / ECMA-404",
    "status": "Stable; ubiquitous",
    "judgement": "Adopt",
    "judgementReason": "Universal payload format; not a choice.",
    "description": [
      "Standard text-based format for structured data. Pairs naturally with JSON Schema, OpenAPI, and AsyncAPI.",
      "In a lake, line-delimited JSON (NDJSON / JSONL) is the typical landing format before transformation into columnar formats like Parquet."
    ],
    "links": [
      {
        "label": "json.org",
        "url": "https://www.json.org"
      },
      {
        "label": "RFC 8259",
        "url": "https://www.rfc-editor.org/rfc/rfc8259"
      }
    ],
    "firstReleased": 2001,
    "logo": "/media/icons/standards-map/logos/json.png",
    "umbrella": "IETF",
    "standardization": "formal-standard"
  },
  "xml": {
    "name": "XML",
    "fullName": "Extensible Markup Language",
    "category": "File Formats",
    "governance": "W3C",
    "status": "Stable since 1998; XML 1.0 (Fifth Edition, 2008) is the de-facto current spec",
    "judgement": "Adopt",
    "judgementReason": "Load-bearing in finance, healthcare, and government; mature tooling around XSD, XSLT, XPath, XQuery.",
    "description": [
      "Tag-based markup format for structured documents and data. Verbose by modern standards, but unmatched tooling around schemas, transformations, and validation (XSD, XSLT, XPath, XQuery).",
      "Still the format of record in many enterprise integration scenarios — banking, healthcare, government, and SOAP-based B2B exchange."
    ],
    "links": [
      {
        "label": "W3C: XML",
        "url": "https://www.w3.org/XML/"
      }
    ],
    "firstReleased": 1998,
    "logo": "/media/icons/standards-map/logos/xml.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "yaml": {
    "name": "YAML",
    "fullName": "YAML Ain't Markup Language",
    "category": "File Formats",
    "governance": "yaml.org / community spec",
    "status": "Stable; v1.2.2 (2021)",
    "judgement": "Adopt",
    "judgementReason": "Default surface for config and contract specs.",
    "niche": true,
    "nicheReason": "Strictly speaking a metadata and configuration format — rarely used to encode the data itself; its role is describing data, not carrying it. Listed because most schema and contract specs in this landscape (OpenAPI, AsyncAPI, ODCS, DPDS, dbt project files) are authored in YAML.",
    "description": [
      "Human-readable data serialisation format. The default surface for metadata, configuration, and contract specifications — Kubernetes manifests, CI/CD pipelines, OpenAPI / AsyncAPI / ODCS / DPDS / dbt project files all live in YAML."
    ],
    "links": [
      {
        "label": "yaml.org",
        "url": "https://yaml.org"
      },
      {
        "label": "YAML spec",
        "url": "https://yaml.org/spec/"
      }
    ],
    "firstReleased": 2001,
    "logo": "/media/icons/standards-map/logos/yaml.svg",
    "umbrella": "YAML.org",
    "standardization": "community",
    "umbrellaSearch": "yaml.org"
  },
  "parquet": {
    "name": "PARQUET",
    "fullName": "Apache Parquet",
    "category": "File Formats",
    "governance": "Apache Software Foundation",
    "status": "Stable; the columnar default",
    "judgement": "Adopt",
    "judgementReason": "De-facto columnar storage; baseline for analytics.",
    "description": [
      "Columnar storage format optimised for analytics: predicate pushdown, column pruning, efficient compression, and typed schemas.",
      "Effectively the default storage format underneath every modern table format (Iceberg, Delta, Hudi)."
    ],
    "standardReason": "Parquet's on-disk format is published as an open spec with independent reader/writer implementations in C++, Java, Rust, Go, and Python. Every analytics engine and lakehouse format depends on the format spec, not on a particular library. That makes the file format itself the standard.",
    "links": [
      {
        "label": "parquet.apache.org",
        "url": "https://parquet.apache.org"
      }
    ],
    "firstReleased": 2013,
    "logo": "/media/icons/standards-map/logos/parquet.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "avro": {
    "name": "AVRO",
    "fullName": "Apache Avro",
    "category": "File Formats",
    "governance": "Apache Software Foundation",
    "status": "Stable; standard in Kafka",
    "judgement": "Adopt",
    "judgementReason": "Kafka's binary payload of choice; first-class with Schema Registry.",
    "description": [
      "Row-oriented binary format that ships its schema alongside the data. Excellent for streaming and event payloads where messages are written and read one at a time.",
      "The natural counterpart to Parquet: Avro for streams, Parquet for analytics."
    ],
    "standardReason": "The Avro container format and binary encoding are open specifications with independent implementations across Java, Python, Go, C++, and Rust. Kafka, Schema Registry, and most stream processors depend on the format spec, not on a particular Avro library — which is what makes it portable.",
    "links": [
      {
        "label": "avro.apache.org",
        "url": "https://avro.apache.org"
      }
    ],
    "firstReleased": 2009,
    "logo": "/media/icons/standards-map/logos/avro.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "orc": {
    "name": "ORC",
    "fullName": "Apache ORC (Optimized Row Columnar)",
    "category": "File Formats",
    "governance": "Apache Software Foundation",
    "status": "Stable; mostly seen in Hive-era stacks",
    "judgement": "Situational",
    "judgementReason": "Pick if you're in Hive/Tez territory; otherwise Parquet wins.",
    "description": [
      "Columnar file format created for the Hive ecosystem. Strong compression and ACID support inside Hive.",
      "Less common in greenfield projects today — Parquet has won most workloads — but still pervasive in big-data installations."
    ],
    "standardReason": "ORC's on-disk format is an open Apache spec with independent readers in Java, C++, and Rust. Hive, Spark, Trino, Presto and others read and write the same files because they target the format, not a single library. The file format is the standard surface.",
    "links": [
      {
        "label": "orc.apache.org",
        "url": "https://orc.apache.org"
      }
    ],
    "firstReleased": 2013,
    "logo": "/media/icons/standards-map/logos/orc.png",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "lance": {
    "name": "Lance",
    "fullName": "Lance",
    "category": [
      "File Formats",
      "Open Table Formats"
    ],
    "governance": "Lance community (independent open source, LF-inspired governance)",
    "status": "Emerging; production use concentrated in AI/ML stacks",
    "judgement": "Assess",
    "judgementReason": "AI/ML-optimised columnar format; trajectory good, adoption concentrated.",
    "description": [
      "Modern columnar file format optimised for AI/ML workloads: 100x faster random access than Parquet, native support for embeddings and multimodal data (images, video, audio), and zero-copy schema evolution without rewriting files.",
      "Pairs with the Lance table format and catalog spec to form a lakehouse stack purpose-built for vector search and feature engineering alongside SQL analytics."
    ],
    "standardReason": "Lance's on-disk format is published as an open spec with independent Rust and Python implementations. LanceDB, DataFusion, DuckDB integrations, and downstream tools target the format itself rather than a single library — putting it on the de-facto standard track under an open, LF-style community governance model.",
    "links": [
      {
        "label": "lance.org",
        "url": "https://lance.org"
      },
      {
        "label": "GitHub: lance",
        "url": "https://github.com/lancedb/lance"
      }
    ],
    "firstReleased": 2022,
    "logo": "/media/icons/standards-map/logos/lance.png",
    "umbrella": "Lance",
    "standardization": "community",
    "umbrellaSearch": "Lance"
  },
  "openlineage": {
    "name": "OpenLineage",
    "fullName": "OpenLineage",
    "category": "Lineage",
    "governance": "Linux Foundation",
    "status": "Stable; growing integration list",
    "judgement": "Adopt",
    "judgementReason": "The lineage-emission standard, with column-level support.",
    "description": [
      "Open standard for collecting lineage metadata from data pipelines, including column-level lineage. Producers emit OpenLineage events; consumers (Marquez, catalogs, custom UIs) ingest them.",
      "For runtime lineage details — how the pipeline actually works — OpenLineage traces are the answer, not the data contract. Use both: contracts for design, OpenLineage for execution."
    ],
    "links": [
      {
        "label": "openlineage.io",
        "url": "https://openlineage.io"
      }
    ],
    "firstReleased": 2020,
    "logo": "/media/icons/standards-map/logos/openlineage.png",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "opa": {
    "name": "OPA",
    "fullName": "Open Policy Agent",
    "category": "Policies",
    "governance": "CNCF (Graduated)",
    "status": "Stable; de-facto policy engine for cloud-native",
    "judgement": "Adopt",
    "judgementReason": "General-purpose policy engine; mature Rego ecosystem.",
    "description": [
      "General-purpose policy engine that decouples policy decisions from your services. Policies are written in Rego and evaluated at request time.",
      "In a data mesh, OPA shows up wherever federated governance needs to enforce rules — access requests, contract gates in CI, infrastructure guard-rails — without putting the rules into application code."
    ],
    "standardReason": "Rego — the policy language OPA defines — has become the portable way to express authorization rules across cloud-native systems. Kubernetes admission control, service meshes, CI gates, and data-mesh governance all consume the same Rego policies. The language and the OPA evaluation API are the standard, with multiple compatible runtimes.",
    "links": [
      {
        "label": "openpolicyagent.org",
        "url": "https://www.openpolicyagent.org"
      }
    ],
    "firstReleased": 2016,
    "logo": "/media/icons/standards-map/logos/opa.png",
    "umbrella": "CNCF",
    "standardization": "foundation"
  },
  "mcp": {
    "name": "MCP",
    "fullName": "Model Context Protocol",
    "category": "AI Interfaces",
    "governance": "Linux Foundation (created by Anthropic; multi-vendor steering)",
    "status": "Active; rapid adoption since late 2024",
    "judgement": "Adopt",
    "judgementReason": "Became the AI tool-integration default in 2024–2025.",
    "description": [
      "Open protocol for connecting AI applications and agents to external systems: data sources, tools, and workflows. The \"USB-C for AI\": one spec, many servers and clients.",
      "For data architectures, MCP is becoming the standard way LLMs discover and query data products, semantic models, and catalogs. Created by Anthropic and now hosted at the Linux Foundation; Claude, ChatGPT, Cursor, VS Code, and many others speak it."
    ],
    "links": [
      {
        "label": "modelcontextprotocol.io",
        "url": "https://modelcontextprotocol.io"
      },
      {
        "label": "GitHub: modelcontextprotocol",
        "url": "https://github.com/modelcontextprotocol"
      }
    ],
    "firstReleased": 2024,
    "logo": "/media/icons/standards-map/logos/mcp.svg",
    "umbrella": "LF",
    "standardization": "foundation",
    "umbrellaSearch": "LF Anthropic"
  },
  "a2a": {
    "name": "A2A",
    "fullName": "Agent2Agent Protocol",
    "category": "AI Interfaces",
    "governance": "Linux Foundation (created by Google; multi-vendor steering)",
    "status": "Active; donated to LF in 2025",
    "judgement": "Situational",
    "judgementReason": "Agent-to-agent protocol; right when you have multi-agent orchestration, otherwise overkill.",
    "description": [
      "Open protocol for AI agents to discover, communicate, and collaborate with each other across vendors and frameworks. Where MCP is agent-to-tools, A2A is agent-to-agent: capability discovery, task delegation, and multi-turn dialogue between independently developed agents.",
      "Created by Google and donated to the Linux Foundation. Increasingly paired with MCP in production stacks: A2A for agent-to-agent orchestration, MCP for the tools and data each agent reaches into."
    ],
    "links": [
      {
        "label": "a2a-protocol.org",
        "url": "https://a2a-protocol.org"
      },
      {
        "label": "GitHub: a2aproject/A2A",
        "url": "https://github.com/a2aproject/A2A"
      }
    ],
    "firstReleased": 2025,
    "logo": "/media/icons/standards-map/logos/a2a.svg",
    "umbrella": "LF",
    "standardization": "foundation",
    "umbrellaSearch": "LF Google"
  },
  "s3": {
    "name": "S3",
    "fullName": "Amazon S3 API",
    "category": "Storage Systems",
    "governance": "AWS (vendor-controlled, but de-facto standard)",
    "status": "De-facto standard; implemented by everyone",
    "judgement": "Adopt",
    "judgementReason": "The object-storage API everyone implements.",
    "description": [
      "AWS's object storage API has become the lingua franca of the data lake. Every cloud, every storage vendor, every minio-style appliance speaks some flavour of it.",
      "Strictly speaking it is not an open standard — AWS controls it — but the practical reality is that \"talks S3\" is treated as a portability guarantee."
    ],
    "standardReason": "AWS publishes the S3 REST API, and MinIO, Cloudflare R2, Backblaze B2, Wasabi, Ceph RGW, GCS interop, and most on-prem object stores re-implement it. \"Speaks S3\" is treated as a portability guarantee precisely because the API surface — not AWS's server — is what tools depend on.",
    "links": [
      {
        "label": "AWS S3 API reference",
        "url": "https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html"
      }
    ],
    "firstReleased": 2006,
    "logo": "/media/icons/standards-map/logos/aws.svg",
    "umbrella": "AWS",
    "umbrellaSearch": "AWS vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "osi": {
    "name": "OSI",
    "fullName": "Open Semantic Interchange",
    "category": "Semantics",
    "highlight": true,
    "governance": "OSI Initiative — Snowflake, Salesforce, dbt Labs, BlackRock, Databricks, Strategy (formerly MicroStrategy), and many more",
    "status": "v0.1.1; explicitly early, working groups very active",
    "judgement": "Assess",
    "judgementReason": "Emerging vendor-neutral semantic exchange; promising, adoption early.",
    "description": [
      "Vendor-agnostic open-source standard for exchanging semantic models — datasets, relationships, metrics, and AI context — between BI platforms, AI agents, and analytics tools.",
      "Overlaps with ODCS on datasets and relationships, and adds metrics (e.g. total_revenue = SUM(order_total)) and dynamic fields (e.g. full_name = first_name + \" \" + last_name). ODCS still owns terms of use, quality, and SLAs."
    ],
    "note": "Acronym collision with the OSI networking model from CS class. This OSI is the Open Semantic Interchange.",
    "links": [
      {
        "label": "opensemantic.com",
        "url": "https://opensemantic.com"
      },
      {
        "label": "GitHub: open-semantic-interchange",
        "url": "https://github.com/open-semantic-interchange/OSI"
      },
      {
        "label": "Open Semantic Editor",
        "url": "https://editor.opensemantic.com"
      }
    ],
    "firstReleased": 2025,
    "logo": "/media/icons/standards-map/logos/osi.svg",
    "umbrella": "OSI Initiative",
    "standardization": "community"
  },
  "rdf-owl": {
    "name": "RDF/OWL",
    "fullName": "Resource Description Framework / Web Ontology Language",
    "category": "Semantics",
    "governance": "W3C",
    "status": "Stable; foundation of the semantic web",
    "judgement": "Situational",
    "judgementReason": "Semantic-web foundation; right when reasoning/inference matter.",
    "description": [
      "RDF is the W3C model for representing graph data; OWL extends it for richer ontologies and reasoning. Together they are the foundation of the semantic web.",
      "YAML formats like ODCS and OSI buy simplicity at the cost of composability. RDF/OWL trade off the other way: harder to author, much easier to compose vocabularies that were never designed together."
    ],
    "links": [
      {
        "label": "W3C: RDF",
        "url": "https://www.w3.org/RDF/"
      },
      {
        "label": "W3C: OWL",
        "url": "https://www.w3.org/OWL/"
      }
    ],
    "firstReleased": 1999,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "dcat": {
    "name": "DCAT",
    "fullName": "Data Catalog Vocabulary",
    "category": "Semantics",
    "governance": "W3C",
    "status": "Stable; v3 current",
    "judgement": "Situational",
    "judgementReason": "Mandatory in EU open-data and many regulatory contexts.",
    "description": [
      "RDF vocabulary designed to facilitate interoperability between data catalogs. Mandatory for many EU open-data portals.",
      "DCAT lives in the RDF / semantic-web world as a catalog vocabulary inspired by libraries and dataset offerings. Different mental model from contract-first standards like ODCS."
    ],
    "links": [
      {
        "label": "W3C: DCAT",
        "url": "https://www.w3.org/TR/vocab-dcat/"
      }
    ],
    "firstReleased": 2014,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "skos": {
    "name": "SKOS",
    "fullName": "Simple Knowledge Organization System",
    "category": "Semantics",
    "governance": "W3C",
    "status": "Stable since 2009",
    "judgement": "Situational",
    "judgementReason": "Right when you actually have a taxonomy.",
    "description": [
      "W3C standard for representing thesauri, taxonomies, and controlled vocabularies in RDF.",
      "Useful for sharing classifications — PII categories, business glossaries, regulatory taxonomies — in a way that other tools can consume without bespoke mapping."
    ],
    "links": [
      {
        "label": "W3C: SKOS",
        "url": "https://www.w3.org/2004/02/skos/"
      }
    ],
    "firstReleased": 2009,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "shacl": {
    "name": "SHACL",
    "fullName": "Shapes Constraint Language",
    "category": "Semantics",
    "governance": "W3C",
    "status": "Stable since 2017 (1.0); SHACL 1.2 in progress (FPWDs published 2025)",
    "judgement": "Situational",
    "judgementReason": "Validate RDF; only when you already speak RDF.",
    "description": [
      "W3C language for validating RDF graphs against a set of conditions — the JSON Schema of the RDF world.",
      "Used heavily where the semantic stack actually has to enforce rules, not just describe them."
    ],
    "links": [
      {
        "label": "W3C: SHACL",
        "url": "https://www.w3.org/TR/shacl/"
      }
    ],
    "firstReleased": 2017,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "json-ld": {
    "name": "JSON-LD",
    "fullName": "JSON for Linking Data",
    "category": "Semantics",
    "governance": "W3C",
    "status": "Stable; JSON-LD 1.1 current",
    "judgement": "Situational",
    "judgementReason": "Pragmatic semantic-web on-ramp inside ordinary JSON.",
    "description": [
      "A JSON-based serialisation for linked data. Lets you embed semantic context (types, relationships, IRIs) into ordinary JSON without breaking JSON consumers that ignore it.",
      "The pragmatic on-ramp into the RDF world: how Schema.org rides on web pages, how decentralised identity (DID, Verifiable Credentials) ships its payloads, and how many APIs add semantics to existing JSON without forcing clients to learn RDF."
    ],
    "links": [
      {
        "label": "W3C: JSON-LD 1.1",
        "url": "https://www.w3.org/TR/json-ld11/"
      },
      {
        "label": "json-ld.org",
        "url": "https://json-ld.org"
      }
    ],
    "firstReleased": 2014,
    "logo": "/media/icons/standards-map/logos/json-ld.png",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "sparql": {
    "name": "SPARQL",
    "fullName": "SPARQL Protocol and RDF Query Language",
    "category": "Query",
    "governance": "W3C",
    "status": "Stable; SPARQL 1.1 current, 1.2 in progress",
    "judgement": "Situational",
    "judgementReason": "Query RDF; right when your data lives in triples.",
    "description": [
      "The query language for RDF graphs. Pattern-matching syntax that walks graph triples, with federation across endpoints, updates, and a standardised HTTP protocol.",
      "Where SQL is to relational tables, SPARQL is to RDF: the standard way to ask questions of an OSI, DCAT, SKOS, or SHACL-validated graph. Without it, the rest of the W3C semantic stack cannot answer queries portably."
    ],
    "links": [
      {
        "label": "W3C: SPARQL 1.1",
        "url": "https://www.w3.org/TR/sparql11-overview/"
      }
    ],
    "firstReleased": 2008,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "hdfs": {
    "name": "HDFS",
    "fullName": "Hadoop Distributed File System",
    "category": "Storage Systems",
    "governance": "Apache Software Foundation",
    "status": "Stable; mature, declining in greenfield projects",
    "judgement": "Caution",
    "judgementReason": "Hadoop-era distributed FS; object storage replaced it for nearly all new builds.",
    "description": [
      "Distributed file system designed to run on commodity hardware, originally inspired by Google File System. The storage substrate of the classic Hadoop stack.",
      "Object storage (S3 and friends) has displaced HDFS in most new architectures, but large existing deployments mean it remains an important interface to support."
    ],
    "links": [
      {
        "label": "hadoop.apache.org",
        "url": "https://hadoop.apache.org"
      }
    ],
    "firstReleased": 2006,
    "logo": "/media/icons/standards-map/logos/hadoop.png",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "jdbc": {
    "name": "JDBC",
    "fullName": "Java Database Connectivity",
    "category": "Database Connectivity",
    "governance": "Java Community Process (JCP)",
    "status": "Stable; ubiquitous in JVM ecosystems",
    "judgement": "Adopt",
    "judgementReason": "Every JVM data tool speaks it; default for 25 years.",
    "description": [
      "Standard Java API for connecting to relational databases. Every serious analytical engine, BI tool, and JVM-based data pipeline speaks JDBC.",
      "Row-oriented by design — fine for transactional workloads, suboptimal for moving large analytical result sets where columnar transports like ADBC are faster."
    ],
    "links": [
      {
        "label": "JDBC overview",
        "url": "https://docs.oracle.com/javase/tutorial/jdbc/"
      }
    ],
    "firstReleased": 1997,
    "logo": "/media/icons/standards-map/logos/jdbc.svg",
    "umbrella": "JCP",
    "standardization": "formal-standard"
  },
  "odbc": {
    "name": "ODBC",
    "fullName": "Open Database Connectivity",
    "category": "Database Connectivity",
    "governance": "ISO/IEC 9075-3 (originally Microsoft / SQL Access Group)",
    "status": "Stable since 1992; cross-language standard",
    "judgement": "Adopt",
    "judgementReason": "Cross-language equivalent of JDBC; sits behind every BI tool.",
    "description": [
      "C-based, language-neutral API for accessing relational databases. The cross-platform sibling of JDBC, and the connectivity layer behind most BI tools and Excel-style integrations.",
      "Like JDBC, ODBC is row-oriented — efficient for transactional access, less so for analytical column-heavy result sets."
    ],
    "links": [
      {
        "label": "ISO/IEC 9075-3",
        "url": "https://www.iso.org/standard/76585.html"
      },
      {
        "label": "Microsoft ODBC reference",
        "url": "https://learn.microsoft.com/en-us/sql/odbc/reference/odbc-overview"
      }
    ],
    "firstReleased": 1992,
    "logo": "/media/icons/standards-map/logos/iso.png",
    "umbrella": "ISO/IEC",
    "standardization": "formal-standard"
  },
  "xmla": {
    "name": "XMLA",
    "fullName": "XML for Analysis",
    "category": "Database Connectivity",
    "governance": "XMLA Council (Microsoft, Hyperion, SAS); de-facto industry spec",
    "status": "Legacy; still the standard wire protocol for OLAP/semantic-layer access",
    "judgement": "Caution",
    "judgementReason": "Legacy SOAP-based BI protocol; survives only inside MS Analysis Services.",
    "description": [
      "SOAP-based protocol for querying multidimensional data sources — cubes, semantic layers, and tabular models. The transport that carries MDX (and DAX) queries between BI clients and analytical engines.",
      "Implemented by Microsoft Analysis Services, Mondrian, SAP BW, AtScale, and consumed by Excel, Power BI, Tableau, and most enterprise BI tools. Greenfield analytics has moved on, but XMLA remains how a vast amount of cube-based reporting actually talks to its backend."
    ],
    "links": [
      {
        "label": "XMLA 1.1 specification",
        "url": "https://learn.microsoft.com/en-us/openspecs/sql_server_protocols/ms-ssas/c0e5c5fc-fe93-4e58-a3b6-b1d95a47d22d"
      },
      {
        "label": "Wikipedia: XML for Analysis",
        "url": "https://en.wikipedia.org/wiki/XML_for_Analysis"
      }
    ],
    "firstReleased": 2001,
    "tier": "legacy",
    "logo": "/media/icons/standards-map/logos/microsoft.svg",
    "umbrella": "Microsoft",
    "standardization": "vendor-led"
  },
  "adbc": {
    "name": "ADBC",
    "fullName": "Arrow Database Connectivity",
    "category": "Database Connectivity",
    "governance": "Apache Software Foundation (Arrow project)",
    "status": "Stable; growing driver ecosystem",
    "judgement": "Adopt",
    "judgementReason": "Arrow-native DB connectivity; the modern replacement for JDBC/ODBC on analytical workloads.",
    "description": [
      "Arrow-native database connectivity API. Where JDBC and ODBC marshal results row-by-row, ADBC moves data as Arrow record batches — zero-copy where possible, dramatically faster for analytical workloads.",
      "Designed to coexist with JDBC and ODBC: drivers can wrap them today, and engines can expose ADBC natively for the columnar fast path."
    ],
    "standardReason": "ADBC is published as an open API specification under the Apache Arrow project, with drivers contributed by multiple vendors (Snowflake, BigQuery, DuckDB, Postgres, SQLite). The driver interface and the Arrow-batch result contract are the standard surface — like JDBC/ODBC, but columnar.",
    "links": [
      {
        "label": "arrow.apache.org/adbc",
        "url": "https://arrow.apache.org/adbc/"
      }
    ],
    "firstReleased": 2023,
    "logo": "/media/icons/standards-map/logos/arrow.png",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "quack": {
    "name": "Quack",
    "fullName": "Quack",
    "category": "Database Connectivity",
    "governance": "DuckDB Labs (vendor-driven open source)",
    "status": "Emerging; new client-server protocol from the DuckDB ecosystem",
    "judgement": "Assess",
    "judgementReason": "DuckDB's new client-server protocol; promising but early — watch adoption beyond the DuckDB ecosystem.",
    "tier": "emerging",
    "description": [
      "Client-server wire protocol from DuckDB Labs for talking to a DuckDB server. Where JDBC/ODBC/ADBC are client APIs that bind to drivers, Quack defines the protocol on the wire — so clients in any language can speak to a DuckDB server without an embedded engine.",
      "Positions DuckDB beyond the embedded use case it grew up in, and complements ADBC: ADBC standardises the columnar result contract, Quack standardises the connection."
    ],
    "standardReason": "Quack is published as an open protocol specification by DuckDB Labs. Listed on the de-facto criterion: the wire format is the standard surface, even though governance sits with a single vendor and adoption is still concentrated in the DuckDB ecosystem.",
    "links": [
      {
        "label": "duckdb.org",
        "url": "https://duckdb.org"
      }
    ],
    "firstReleased": 2025,
    "logo": "/media/icons/standards-map/logos/duckdb.svg",
    "umbrella": "DuckDB Labs",
    "umbrellaSearch": "DuckDB Labs vendor",
    "vendor": true,
    "standardization": "vendor-led"
  },
  "http": {
    "name": "HTTP",
    "fullName": "Hypertext Transfer Protocol",
    "category": "Interconnection",
    "governance": "IETF",
    "status": "Stable; HTTP/3 (RFC 9114) is current",
    "judgement": "Adopt",
    "judgementReason": "Universal application transport; nobody decides to use it.",
    "description": [
      "The application protocol of the web, and the universal transport beneath REST APIs, OpenAPI, GraphQL, gRPC (HTTP/2), Iceberg REST catalogs, and OpenLineage events.",
      "Most data interfaces today are some flavour of \"HTTP plus a schema\" — which is why HTTP is a load-bearing assumption in almost any modern data architecture."
    ],
    "links": [
      {
        "label": "RFC 9110 (HTTP semantics)",
        "url": "https://www.rfc-editor.org/rfc/rfc9110"
      }
    ],
    "firstReleased": 1991,
    "logo": "/media/icons/standards-map/logos/ietf.svg",
    "umbrella": "IETF",
    "standardization": "formal-standard"
  },
  "kafka": {
    "name": "Kafka",
    "fullName": "Apache Kafka",
    "category": "Messaging",
    "governance": "Apache Software Foundation",
    "status": "Stable; the de-facto streaming platform",
    "judgement": "Adopt",
    "judgementReason": "The streaming backbone; competitors challenge the implementation, not the category.",
    "description": [
      "Distributed log and streaming platform. Originally built at LinkedIn, donated to ASF; now the default backbone for event-driven data pipelines.",
      "The Kafka wire protocol is openly documented and re-implemented by Redpanda, WarpStream, and others, so it sits next to AMQP as a standard option for asynchronous data movement."
    ],
    "standardReason": "The Kafka wire protocol is publicly documented and re-implemented by Redpanda, WarpStream, and AutoMQ — clients written for one server work against the others. The standard surface is the protocol, not the Apache Kafka broker; that is what lets producers and consumers stay portable across vendors.",
    "links": [
      {
        "label": "kafka.apache.org",
        "url": "https://kafka.apache.org"
      }
    ],
    "firstReleased": 2011,
    "logo": "/media/icons/standards-map/logos/kafka.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "mqtt": {
    "name": "MQTT",
    "fullName": "Message Queuing Telemetry Transport",
    "category": "Messaging",
    "governance": "OASIS",
    "status": "Stable; MQTT 5.0 standardised",
    "judgement": "Situational",
    "judgementReason": "Excellent in IoT/edge, irrelevant outside it.",
    "description": [
      "Lightweight publish-subscribe messaging protocol designed for constrained devices and unreliable networks. The dominant wire protocol for IoT and edge data.",
      "Where AMQP and Kafka fit centralised broker patterns inside the data centre, MQTT is what shows up at the edge: sensors, vehicles, factory floors. Bridging MQTT into Kafka is a common ingest pattern for industrial data."
    ],
    "links": [
      {
        "label": "mqtt.org",
        "url": "https://mqtt.org"
      },
      {
        "label": "OASIS MQTT 5.0",
        "url": "https://docs.oasis-open.org/mqtt/mqtt/v5.0/mqtt-v5.0.html"
      }
    ],
    "firstReleased": 1999,
    "logo": "/media/icons/standards-map/logos/mqtt.svg",
    "umbrella": "OASIS",
    "standardization": "formal-standard"
  },
  "cloudevents": {
    "name": "CloudEvents",
    "fullName": "CloudEvents",
    "category": "Messaging",
    "governance": "CNCF (Graduated)",
    "status": "Stable; CloudEvents 1.0 graduated at CNCF",
    "judgement": "Adopt",
    "judgementReason": "Default event envelope; HTTP/Kafka/AMQP/MQTT bindings all standardised.",
    "description": [
      "Specification for describing event data in a common way. Defines a vendor-neutral envelope (id, source, type, time, data) plus protocol bindings for HTTP, Kafka, AMQP, MQTT, NATS, and others.",
      "Where Kafka/AMQP/MQTT are transports, CloudEvents is the shape of what flows over them. Increasingly the default cross-system event format in cloud-native and serverless architectures."
    ],
    "links": [
      {
        "label": "cloudevents.io",
        "url": "https://cloudevents.io"
      },
      {
        "label": "GitHub: cloudevents/spec",
        "url": "https://github.com/cloudevents/spec"
      }
    ],
    "firstReleased": 2018,
    "logo": "/media/icons/standards-map/logos/cloudevents.svg",
    "umbrella": "CNCF",
    "standardization": "foundation"
  },
  "amqp": {
    "name": "AMQP",
    "fullName": "Advanced Message Queuing Protocol",
    "category": "Messaging",
    "governance": "OASIS",
    "status": "Stable; AMQP 1.0 standardised",
    "judgement": "Situational",
    "judgementReason": "Pick when you want broker semantics (RabbitMQ-shaped problems).",
    "description": [
      "Open wire-level protocol for message-oriented middleware. Reliable, broker-mediated message exchange independent of any single vendor.",
      "Implemented by RabbitMQ, Azure Service Bus, ActiveMQ and others. Sits next to MQTT and the Kafka protocol as a standard option for asynchronous data movement."
    ],
    "links": [
      {
        "label": "amqp.org",
        "url": "https://www.amqp.org"
      },
      {
        "label": "OASIS AMQP 1.0",
        "url": "https://docs.oasis-open.org/amqp/core/v1.0/os/amqp-core-overview-v1.0-os.html"
      }
    ],
    "firstReleased": 2003,
    "logo": "/media/icons/standards-map/logos/oasis.png",
    "umbrella": "OASIS",
    "standardization": "formal-standard"
  },
  "jms": {
    "name": "JMS",
    "fullName": "Jakarta Messaging (formerly Java Message Service)",
    "category": "Messaging",
    "governance": "Eclipse Foundation (Jakarta EE); originally JCP",
    "status": "Stable; Jakarta Messaging 3.1 is current",
    "judgement": "Caution",
    "judgementReason": "Java-only messaging API; maintain if you have it, don't pick it new.",
    "description": [
      "Java API specification for producing, sending, and consuming messages between application components. Originally JSR 914 under the Java Community Process; now Jakarta Messaging under the Eclipse Foundation.",
      "Implemented by ActiveMQ, Artemis, IBM MQ, Solace, and most enterprise message brokers. JMS sits one layer above wire protocols like AMQP — it standardises the application API rather than the on-the-wire bytes."
    ],
    "links": [
      {
        "label": "Jakarta Messaging spec",
        "url": "https://jakarta.ee/specifications/messaging/"
      },
      {
        "label": "JSR 914 (original JMS 1.1)",
        "url": "https://jcp.org/en/jsr/detail?id=914"
      }
    ],
    "firstReleased": 2001,
    "logo": "/media/icons/standards-map/logos/jakarta-ee.svg",
    "umbrella": "Jakarta EE",
    "standardization": "formal-standard",
    "umbrellaSearch": "Eclipse Jakarta",
    "tier": "legacy"
  },
  "ftp": {
    "name": "FTP / SFTP",
    "fullName": "File Transfer Protocol / SSH File Transfer Protocol",
    "category": "Interconnection",
    "governance": "IETF",
    "status": "Stable; legacy but pervasive in B2B data exchange",
    "judgement": "Caution",
    "judgementReason": "Unencrypted FTP is unsafe for new work; SFTP-only as a last resort when partners can't speak HTTP APIs.",
    "description": [
      "FTP (RFC 959) is the original file transfer protocol; SFTP runs file transfer over an SSH channel and is the secure variant most enterprises actually deploy.",
      "Unsexy but unavoidable: many regulated industries — banking, insurance, public sector — still hand off bulk data between organisations as nightly (S)FTP drops."
    ],
    "links": [
      {
        "label": "RFC 959 (FTP)",
        "url": "https://www.rfc-editor.org/rfc/rfc959"
      },
      {
        "label": "SFTP draft",
        "url": "https://datatracker.ietf.org/doc/html/draft-ietf-secsh-filexfer-13"
      }
    ],
    "firstReleased": 1971,
    "logo": "/media/icons/standards-map/logos/ietf.svg",
    "umbrella": "IETF",
    "standardization": "formal-standard",
    "tier": "stable"
  },
  "deltasharing": {
    "name": "Delta Sharing",
    "fullName": "Delta Sharing Protocol",
    "category": "Interconnection",
    "governance": "Linux Foundation (donated by Databricks)",
    "status": "Stable; multi-vendor adoption",
    "judgement": "Situational",
    "judgementReason": "Cross-org sharing without copies; well-defined niche.",
    "description": [
      "Open REST protocol for sharing tabular datasets across organisations and platforms without copying data. A recipient gets short-lived signed URLs to read Parquet/Delta files directly from the provider's object store.",
      "Implemented by Databricks, plus open-source servers and clients in Python, Spark, Pandas, and Power BI. Sits next to FTP and HTTP as a modern, table-aware option for cross-organisation data exchange."
    ],
    "standardReason": "Delta Sharing is published as an open REST protocol specification under the Linux Foundation, with reference servers and clients across multiple languages and several independent implementations (Databricks, AWS, Azure, Power BI). The wire protocol is the standard surface — not the Databricks product.",
    "links": [
      {
        "label": "delta.io/sharing",
        "url": "https://delta.io/sharing/"
      },
      {
        "label": "GitHub: delta-io/delta-sharing",
        "url": "https://github.com/delta-io/delta-sharing"
      }
    ],
    "firstReleased": 2021,
    "logo": "/media/icons/standards-map/logos/delta-sharing.svg",
    "umbrella": "LF",
    "standardization": "foundation"
  },
  "dataframe": {
    "name": "DataFrame API",
    "fullName": "Python Dataframe API Standard",
    "category": "In-Memory Format",
    "governance": "Consortium for Python Data API Standards (data-apis.org)",
    "status": "Draft RFC; multi-vendor adoption underway",
    "judgement": "Adopt",
    "judgementReason": "Portable Python DataFrame spec; the way to write library code that runs on pandas, Polars, Modin, and cuDF.",
    "description": [
      "Open standard for a portable DataFrame API across Python implementations. The aim: write code once, run on pandas, Polars, Modin, cuDF, or any compliant library.",
      "Sister specification to the Array API Standard. Sponsored by data-apis.org with backing from Quansight, NVIDIA, Microsoft, Intel, and the major DataFrame projects. The standard everyone reaches for when \"which DataFrame API?\" needs to stop being a portability question."
    ],
    "links": [
      {
        "label": "data-apis.org",
        "url": "https://data-apis.org"
      },
      {
        "label": "DataFrame API draft RFC",
        "url": "https://data-apis.org/dataframe-api/draft/"
      }
    ],
    "firstReleased": 2021,
    "tier": "stable",
    "logo": "/media/icons/standards-map/logos/dataframe.png",
    "umbrella": "data-apis",
    "standardization": "community"
  },
  "arrow": {
    "name": "Apache Arrow",
    "fullName": "Apache Arrow",
    "category": "In-Memory Format",
    "governance": "Apache Software Foundation",
    "status": "Stable; broadly adopted across the analytics stack",
    "judgement": "Adopt",
    "judgementReason": "In-memory columnar standard powering Flight, ADBC, DataFusion, Polars.",
    "description": [
      "Language-agnostic columnar in-memory format for analytical data. Designed for zero-copy reads and efficient interchange between processes, languages, and engines.",
      "Foundation for ADBC, the basis of pandas 2.x, the wire format of Arrow Flight, and the in-memory layout of DuckDB, Polars, and many others. Arrow is the connective tissue of modern analytics."
    ],
    "standardReason": "Arrow's columnar memory layout and IPC format are an open specification, not a library. DuckDB, Polars, pandas 2.x, Spark, and ADBC all align on the same in-memory representation, which is what enables zero-copy interchange across engines and languages. The format spec is the standard.",
    "links": [
      {
        "label": "arrow.apache.org",
        "url": "https://arrow.apache.org"
      }
    ],
    "firstReleased": 2016,
    "logo": "/media/icons/standards-map/logos/arrow.png",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "spark": {
    "name": "Spark",
    "fullName": "Apache Spark",
    "category": "Processing",
    "governance": "Apache Software Foundation",
    "status": "Stable; Spark 4.x current, 3.x widely deployed",
    "judgement": "Adopt",
    "judgementReason": "Default distributed batch+streaming engine.",
    "description": [
      "Distributed analytics engine with a DataFrame API across Scala, Java, Python, and R. The classic large-scale processing engine for batch and streaming workloads.",
      "Spark Connect decouples client and engine, making the DataFrame API a portable client interface — which is why it shows up here as an interface, not just an engine."
    ],
    "standardReason": "Spark's DataFrame API and the Spark Connect protocol are the standard parts. Spark Connect publishes a gRPC contract that any Spark-compatible engine can serve, and the DataFrame API is what other engines (e.g. Databricks Connect, Apache Sedona) target. The interface, not the JVM runtime, is what makes Spark a standard.",
    "links": [
      {
        "label": "spark.apache.org",
        "url": "https://spark.apache.org"
      }
    ],
    "firstReleased": 2014,
    "tier": "stable",
    "logo": "/media/icons/standards-map/logos/spark.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "beam": {
    "name": "Beam",
    "fullName": "Apache Beam",
    "category": "Processing",
    "governance": "Apache Software Foundation",
    "status": "Stable; multi-runner model spec",
    "judgement": "Situational",
    "judgementReason": "Write once, run on Flink/Spark/Dataflow; right when runner portability matters.",
    "description": [
      "Unified programming model for batch and streaming data processing. Pipelines written once in Beam can run on Flink, Spark, Google Cloud Dataflow, or the direct local runner.",
      "Originated as Google's Dataflow SDK, donated to ASF in 2016. Stronger claim to “standard” than most processing frameworks: the model itself — PCollections, transforms, windowing, triggers — is the spec, with runners as competing implementations."
    ],
    "standardReason": "Beam's value is the model, not the runner. The Beam programming model and the portability framework (Fn API, Runner API) are an open specification that multiple runners implement. Pipelines move between Flink, Spark, and Dataflow without code changes precisely because all three implement the same Beam contract — making the model the standard surface tools and teams target.",
    "links": [
      {
        "label": "beam.apache.org",
        "url": "https://beam.apache.org"
      }
    ],
    "firstReleased": 2016,
    "logo": "/media/icons/standards-map/logos/beam.svg",
    "umbrella": "ASF",
    "standardization": "foundation",
    "umbrellaSearch": "ASF Apache"
  },
  "pandas": {
    "name": "Pandas",
    "fullName": "pandas",
    "category": "Processing",
    "governance": "NumFOCUS (open source)",
    "status": "Stable; the de-facto Python DataFrame API",
    "judgement": "Adopt",
    "judgementReason": "Universal Python DataFrame; you don't choose it, you encounter it.",
    "description": [
      "The original Python DataFrame library. Whatever else a Python data practitioner uses, they almost certainly know pandas — which makes its API a de-facto interface that other libraries (Polars, Modin, cuDF) deliberately mimic.",
      "Pandas 2.x added Arrow-backed columns, narrowing the gap with newer engines while preserving the familiar API."
    ],
    "standardReason": "The pandas Python API is the de-facto DataFrame interface other libraries deliberately mimic — Modin, cuDF, Dask, and many wrappers expose pandas-compatible surfaces precisely because users already know it. The API shape, not the implementation, is what makes pandas a standard reference.",
    "links": [
      {
        "label": "pandas.pydata.org",
        "url": "https://pandas.pydata.org"
      }
    ],
    "firstReleased": 2008,
    "logo": "/media/icons/standards-map/logos/pandas.svg",
    "umbrella": "NumFOCUS",
    "standardization": "community"
  },
  "ibis": {
    "name": "Ibis",
    "fullName": "Ibis",
    "category": "Processing",
    "governance": "Ibis Project (open source)",
    "status": "Stable; multi-backend",
    "judgement": "Assess",
    "judgementReason": "Portable Python DataFrame compiling to many backends; trajectory strong, footprint small.",
    "description": [
      "Portable Python DataFrame API that compiles expressions to a backend of choice — DuckDB, BigQuery, Snowflake, Postgres, Spark, and many others.",
      "Lets analytics code be written once and executed against whichever engine the data happens to live on, without rewriting in each backend's SQL dialect."
    ],
    "standardReason": "Ibis defines a backend-neutral DataFrame expression API, with a published backend protocol that 20+ engines implement. The API and the backend contract are the standard surface — Ibis is the rare project where the standardised \"interface\" is its whole reason to exist.",
    "links": [
      {
        "label": "ibis-project.org",
        "url": "https://ibis-project.org"
      }
    ],
    "firstReleased": 2015,
    "logo": "/media/icons/standards-map/logos/ibis.png",
    "umbrella": "Ibis",
    "standardization": "community",
    "umbrellaSearch": "Ibis Project"
  },
  "sql-dml": {
    "name": "SQL DML",
    "fullName": "SQL Data Manipulation Language",
    "category": "Processing",
    "governance": "ANSI / ISO/IEC 9075",
    "status": "Stable since 1986; revised regularly (latest SQL:2023)",
    "judgement": "Adopt",
    "judgementReason": "Portable transformation language across relational/lakehouse engines.",
    "description": [
      "The processing side of SQL: CTEs and window functions for transformation logic, INSERT / UPDATE / DELETE / MERGE for state changes, and CREATE OR REPLACE TABLE / VIEW for materialising results. The portable way to express batch transformations against any relational or lakehouse engine.",
      "Tools like dbt, SQLMesh, and dataform are essentially orchestrators around this surface — the transformations themselves are SQL DML."
    ],
    "standardReason": "ISO/IEC 9075 specifies a portable DML core — CTEs (SQL:1999), window functions (SQL:2003), and MERGE (SQL:2003) — that every serious analytical engine implements. Transformations written against this core run on Snowflake, BigQuery, Databricks, DuckDB, Postgres, and Trino with minimal change. The standard surface, not any vendor's dialect, is what makes SQL the default processing language.",
    "links": [
      {
        "label": "ISO/IEC 9075 standard",
        "url": "https://www.iso.org/standard/76583.html"
      },
      {
        "label": "Wikipedia: SQL",
        "url": "https://en.wikipedia.org/wiki/SQL"
      }
    ],
    "firstReleased": 1986,
    "logo": "/media/icons/standards-map/logos/iso.png",
    "umbrella": "ISO/IEC",
    "standardization": "formal-standard"
  },
  "xslt": {
    "name": "XSLT",
    "fullName": "Extensible Stylesheet Language Transformations",
    "category": "Processing",
    "governance": "W3C",
    "status": "Legacy; XSLT 3.0 (2017) is current, but new adoption is rare",
    "judgement": "Caution",
    "judgementReason": "XML transformation; maintain if you have it, don't pick it new.",
    "description": [
      "Declarative, template-based language for transforming XML documents into other XML, HTML, or text. The processing counterpart to XML Schema — where XSD describes structure, XSLT rewrites it.",
      "Implemented by Saxon, libxslt, and every major XML toolchain. Still load-bearing in publishing, government, banking, and healthcare integrations where XML pipelines never went away."
    ],
    "links": [
      {
        "label": "W3C: XSL Transformations (XSLT) 3.0",
        "url": "https://www.w3.org/TR/xslt-30/"
      },
      {
        "label": "Wikipedia: XSLT",
        "url": "https://en.wikipedia.org/wiki/XSLT"
      }
    ],
    "firstReleased": 1999,
    "tier": "legacy",
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "linkml": {
    "name": "LinkML",
    "fullName": "Linked data Modeling Language",
    "category": "Schema",
    "governance": "LinkML community / Monarch Initiative",
    "status": "Stable; widely used in biomedical and life-sciences data modelling",
    "judgement": "Assess",
    "judgementReason": "Multi-output schema language; strong in biomedical, niche elsewhere.",
    "niche": true,
    "nicheReason": "Specialised modelling language with deepest adoption in biomedical and research data communities (Monarch, NIH Bridge2AI, INCLUDE). Powerful for cross-format schemas — generates JSON Schema, SHACL, RDF, Pydantic, Java, etc. from one source — but outside life-sciences its mindshare is still small.",
    "description": [
      "YAML-based schema language for modelling structured data and ontologies. One LinkML schema can be compiled to JSON Schema, SHACL, RDF/OWL, Pydantic classes, Protobuf, and SQL DDL — useful when the same domain model needs to power validation, semantics, and code generation simultaneously.",
      "Sits between schema languages (JSON Schema, AVRO Schema) and ontology languages (RDF/OWL, SHACL). Pragmatic, opinionated, and increasingly the default modelling vehicle in linked-data-friendly research consortia."
    ],
    "links": [
      {
        "label": "linkml.io",
        "url": "https://linkml.io"
      },
      {
        "label": "GitHub: linkml/linkml",
        "url": "https://github.com/linkml/linkml"
      }
    ],
    "firstReleased": 2019,
    "logo": "/media/icons/standards-map/logos/linkml.png",
    "umbrella": "LinkML",
    "standardization": "community"
  },
  "schemaorg": {
    "name": "schema.org",
    "fullName": "schema.org vocabulary",
    "category": "Semantics",
    "governance": "schema.org Community Group / W3C",
    "status": "Stable; mainstream for web structured data, niche in data-architecture contexts",
    "judgement": "Assess",
    "judgementReason": "Web SEO/structured-data vocabulary; different audience from data-platform work.",
    "niche": true,
    "nicheReason": "Mainstream on the public web — Google rich results, Bing, Yandex, and email actions all consume schema.org markup — but inside data-architecture and data-mesh contexts it rarely surfaces. Listed because anyone modelling cross-domain semantics will eventually meet it, and because it's the largest open-vocabulary effort by far.",
    "description": [
      "Open vocabulary of types and properties for structured data on the web, originated by Google, Microsoft, Yahoo, and Yandex in 2011 and now stewarded by the schema.org Community Group at W3C. Provides ~800 types (Person, Organization, Product, Dataset, Article…) and thousands of properties.",
      "Most often serialised as JSON-LD inside HTML pages so search engines can read structured data. The Dataset and DataCatalog types overlap with DCAT — and many open-data portals publish both views of the same metadata."
    ],
    "links": [
      {
        "label": "schema.org",
        "url": "https://schema.org"
      },
      {
        "label": "GitHub: schemaorg/schemaorg",
        "url": "https://github.com/schemaorg/schemaorg"
      }
    ],
    "firstReleased": 2011,
    "logo": "/media/icons/standards-map/logos/schemaorg.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "shex": {
    "name": "ShEx",
    "fullName": "Shape Expressions Language",
    "category": "Semantics",
    "governance": "W3C Shape Expressions Community Group",
    "status": "Stable; ShEx 2.1 current",
    "judgement": "Assess",
    "judgementReason": "RDF shape validation parallel to SHACL; smaller community.",
    "niche": true,
    "nicheReason": "RDF shape-validation language that pre-dates and parallels SHACL. Adoption is concentrated in linked-data communities (Wikidata, biomedical knowledge graphs); SHACL — being a W3C Recommendation rather than a Community Group spec — is the more common pick in mainstream contexts.",
    "description": [
      "Concise, human-friendly grammar for describing the expected shape of RDF graphs. Used to validate that linked data conforms to a target schema before publishing or ingesting it.",
      "Originated alongside SHACL during the W3C RDF Data Shapes Working Group. The two specs solve overlapping problems with different design choices: ShEx prioritises a compact ShExC syntax and recursive shape definitions; SHACL prioritises RDF-native rule expression."
    ],
    "links": [
      {
        "label": "shex.io",
        "url": "https://shex.io/"
      },
      {
        "label": "ShEx 2.1 Primer",
        "url": "https://shex.io/shex-primer/"
      }
    ],
    "firstReleased": 2017,
    "logo": "/media/icons/standards-map/logos/shex.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "odrl": {
    "name": "ODRL",
    "fullName": "Open Digital Rights Language",
    "category": "Policies",
    "governance": "W3C",
    "status": "W3C Recommendation; v2.2 current (since 2018)",
    "judgement": "Assess",
    "judgementReason": "Rights-expression language; relevant where data licensing matters.",
    "niche": true,
    "nicheReason": "Rights expression language designed for licensing, terms-of-use, and data-sharing agreements. Increasingly relevant for data contracts and data spaces (Gaia-X, IDSA, Eclipse Dataspace Components reference ODRL for usage policies), but adoption outside that frame is still limited compared to OPA for runtime authorisation.",
    "description": [
      "RDF/JSON-based language for expressing permissions, prohibitions, and obligations over digital assets. Born out of digital-rights management; W3C ratified the Information Model (v2.2) and Vocabulary in 2018.",
      "Sees growing use in European data-spaces work where machine-readable usage policies are part of the contract surface — ODRL policies often ship alongside ODCS-style contracts to declare what consumers may and may not do with the data."
    ],
    "links": [
      {
        "label": "ODRL Information Model 2.2",
        "url": "https://www.w3.org/TR/odrl-model/"
      },
      {
        "label": "ODRL Vocabulary 2.2",
        "url": "https://www.w3.org/TR/odrl-vocab/"
      },
      {
        "label": "ODRL Community Group",
        "url": "https://www.w3.org/community/odrl/"
      }
    ],
    "firstReleased": 2018,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "prov": {
    "name": "PROV",
    "fullName": "PROV — Provenance Family",
    "category": "Lineage",
    "governance": "W3C",
    "status": "W3C Recommendation since 2013",
    "judgement": "Assess",
    "judgementReason": "W3C provenance vocabulary; predates OpenLineage by a decade, fading.",
    "niche": true,
    "nicheReason": "RDF-based provenance model pre-dates OpenLineage by nearly a decade and is the canonical reference in research and digital-preservation contexts. Mainstream data engineering tooling has converged on OpenLineage instead — PROV stays the right choice when provenance has to live in a knowledge graph, not in a pipeline observability backend.",
    "description": [
      "Family of W3C documents (PROV-DM data model, PROV-O OWL ontology, PROV-N notation, PROV-XML serialisation) for describing entities, activities, and agents involved in producing a piece of data.",
      "Common in scientific data, archival systems, and FAIR-data contexts. The conceptual model (Entity / Activity / Agent / wasDerivedFrom / wasGeneratedBy) heavily influenced later lineage standards including OpenLineage's job/run/dataset triples."
    ],
    "links": [
      {
        "label": "PROV Overview",
        "url": "https://www.w3.org/TR/prov-overview/"
      },
      {
        "label": "PROV-O ontology",
        "url": "https://www.w3.org/TR/prov-o/"
      },
      {
        "label": "PROV-DM model",
        "url": "https://www.w3.org/TR/prov-dm/"
      }
    ],
    "firstReleased": 2013,
    "logo": "/media/icons/standards-map/logos/w3c.svg",
    "umbrella": "W3C",
    "standardization": "formal-standard"
  },
  "gql": {
    "name": "GQL",
    "fullName": "Graph Query Language",
    "category": "Query",
    "governance": "ISO/IEC 39075:2024",
    "status": "Stable; ISO/IEC 39075:2024 published",
    "judgement": "Situational",
    "judgementReason": "Vendor-neutral property-graph query language — pick when your stack is already on graph databases.",
    "niche": true,
    "nicheReason": "First international query-language standard since SQL — the property-graph counterpart to SQL — but adoption is still limited compared to Cypher (Neo4j) and Gremlin. Worth tracking because it gives graph databases the same kind of vendor-neutral baseline SQL gave relational, but it isn't yet the day-to-day surface for most data work.",
    "description": [
      "ISO/IEC 39075 standard query language for property graphs, ratified in 2024. The first new ISO data query language standard since SQL; designed to do for graph databases what SQL did for relational ones.",
      "Implemented or in-progress at Neo4j (extending Cypher), TigerGraph, Oracle, Memgraph, AWS Neptune, and others. Heavily influenced by openCypher and ISO SQL/PGQ (the SQL extension for property-graph queries that shipped in SQL:2023)."
    ],
    "links": [
      {
        "label": "ISO/IEC 39075:2024",
        "url": "https://www.iso.org/standard/76120.html"
      },
      {
        "label": "GQL Standards (project site)",
        "url": "https://www.gqlstandards.org/"
      }
    ],
    "firstReleased": 2024,
    "logo": "/media/icons/standards-map/logos/iso.png",
    "umbrella": "ISO/IEC",
    "standardization": "formal-standard"
  },
  "odata": {
    "name": "OData",
    "fullName": "Open Data Protocol (v4)",
    "category": "Contracts",
    "governance": "OASIS (originally Microsoft)",
    "status": "Stable; v4.01 current (OASIS Standard since 2014)",
    "judgement": "Situational",
    "judgementReason": "Pick when you live in the Microsoft BI/Dynamics world.",
    "description": [
      "Standardised protocol for building and consuming RESTful data APIs. Defines URL conventions, query syntax (filter, select, expand, orderby, pagination), and a metadata model (CSDL) so clients can discover schema and capabilities programmatically.",
      "Heavily used inside Microsoft (Dynamics 365, Power BI, Microsoft Graph), SAP (S/4HANA, OData services), Salesforce, and Oracle Fusion. Where OpenAPI describes any REST surface, OData prescribes a uniform shape so consumers don't have to learn each API individually."
    ],
    "standardReason": "OData is an OASIS Standard with a publicly-defined wire protocol and metadata format (CSDL). Multiple independent implementations exist across Microsoft, SAP, Salesforce, Oracle, and the OData4j / Olingo open-source stacks, so \"speaks OData\" is a portability claim rather than a vendor-lock-in label.",
    "links": [
      {
        "label": "odata.org",
        "url": "https://www.odata.org/"
      },
      {
        "label": "OASIS OData TC",
        "url": "https://www.oasis-open.org/committees/odata/"
      },
      {
        "label": "OData v4.01 spec",
        "url": "https://docs.oasis-open.org/odata/odata/v4.01/"
      }
    ],
    "firstReleased": 2014,
    "logo": "/media/icons/standards-map/logos/odata.png",
    "umbrella": "OASIS",
    "standardization": "formal-standard"
  },
  "frictionless-table-schema": {
    "name": "Table Schema",
    "fullName": "Frictionless Table Schema",
    "category": "Schema",
    "governance": "Frictionless Data / Open Knowledge Foundation",
    "status": "Stable; v1.0 (2017), continually maintained",
    "judgement": "Assess",
    "judgementReason": "Lightweight Frictionless tabular spec; small ecosystem.",
    "niche": true,
    "nicheReason": "JSON-based schema spec for tabular data, part of the Frictionless Data toolkit. Adoption is concentrated in open-data publishing (government portals, research data) where it pairs with Data Package and DCAT — outside that scene most teams reach for JSON Schema, AVRO Schema, or SQL DDL instead.",
    "description": [
      "Lightweight JSON spec for describing the columns of a tabular dataset — name, type, format, constraints, primary key, foreign keys. Rendered alongside the actual CSV/JSON data inside a Data Package so consumers can validate and type-cast without out-of-band documentation.",
      "Common in open-data ecosystems: data.gov, the EU Open Data Portal, and many research-data publishers ship Frictionless Data Packages containing one Table Schema per resource."
    ],
    "links": [
      {
        "label": "Table Schema spec",
        "url": "https://specs.frictionlessdata.io/table-schema/"
      },
      {
        "label": "frictionlessdata.io",
        "url": "https://frictionlessdata.io/"
      }
    ],
    "firstReleased": 2017,
    "logo": "/media/icons/standards-map/logos/frictionless.svg",
    "umbrella": "Frictionless",
    "standardization": "community",
    "umbrellaSearch": "Frictionless OKF Open Knowledge"
  }
}
