Created
August 1, 2025 09:51
-
-
Save reknih/abc853f3c01d4babd3eabb496934508b to your computer and use it in GitHub Desktop.
PDF 1.7 containment rules
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// Role the structure element fulfills in the document for PDF 1.7 and below. | |
| /// | |
| /// These are the predefined standard roles in PDF 1.7 and below, matching the | |
| /// `https://www.iso.org/pdf/ssn` namespace. The writer may write their own | |
| /// roles and then provide a mapping with [`StructTreeRoot::role_map`], or, if | |
| /// writing PDF 2.0, with [`Namespace::role_map_ns`]. PDF 1.4+. | |
| #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] | |
| pub enum StructRole { | |
| /// The whole document. | |
| Document, | |
| /// A part of a document that may contain multiple articles or sections. | |
| Part, | |
| /// An article with largely self-contained content. | |
| Art, | |
| /// Section of a larger document. | |
| Sect, | |
| /// Generic subdivision. | |
| Div, | |
| /// A paragraph-level quote. | |
| BlockQuote, | |
| /// An image or figure caption. | |
| Caption, | |
| /// Table of contents. | |
| TOC, | |
| /// Item in the table of contents. | |
| TOCI, | |
| /// Index of the key terms in the document. | |
| Index, | |
| /// Element only present for grouping purposes that shall not be exported. | |
| NonStruct, | |
| /// Element present only for use by the writer and associated products. | |
| Private, | |
| /// A paragraph | |
| P, | |
| /// A strongly structured heading. | |
| StructuredHeading, | |
| /// First-level heading. | |
| H1, | |
| /// Second-level heading. | |
| H2, | |
| /// Third-level heading. | |
| H3, | |
| /// Fourth-level heading. | |
| H4, | |
| /// Fifth-level heading. | |
| H5, | |
| /// Sixth-level heading. | |
| H6, | |
| /// A list. | |
| L, | |
| /// A list item. | |
| LI, | |
| /// Label for a list item. | |
| Lbl, | |
| /// Description of the list item. | |
| LBody, | |
| /// A table. | |
| Table, | |
| /// A table row. | |
| TR, | |
| /// A table header cell. | |
| TH, | |
| /// A table data cell. | |
| TD, | |
| /// A table header row group. | |
| THead, | |
| /// A table data row group. | |
| TBody, | |
| /// A table footer row group. | |
| TFoot, | |
| /// A generic inline element. | |
| Span, | |
| /// An inline quotation. | |
| Quote, | |
| /// A foot- or endnote. | |
| Note, | |
| /// A reference to elsewhere in the document. | |
| Reference, | |
| /// A reference to an external document. | |
| BibEntry, | |
| /// Computer code. | |
| Code, | |
| /// A link. | |
| Link, | |
| /// An association between an annotation and the content it belongs to. PDF | |
| /// 1.5+ | |
| Annot, | |
| /// Ruby annotation for CJK text. PDF 1.5+ | |
| Ruby, | |
| /// Warichu annotation for CJK text. PDF 1.5+ | |
| Warichu, | |
| /// Base text of a Ruby annotation. PDF 1.5+ | |
| RB, | |
| /// Annotation text of a Ruby annotation. PDF 1.5+ | |
| RT, | |
| /// Punctuation of a Ruby annotation. PDF 1.5+ | |
| RP, | |
| /// Text of a Warichu annotation. PDF 1.5+ | |
| WT, | |
| /// Punctuation of a Warichu annotation. PDF 1.5+ | |
| WP, | |
| /// Item of graphical content. | |
| Figure, | |
| /// Mathematical formula. | |
| Formula, | |
| /// Form widget. | |
| Form, | |
| } | |
| impl StructRole { | |
| /// Return the type of the structure element. | |
| pub fn role_type(self) -> StructRoleType { | |
| match self { | |
| Self::Document | |
| | Self::Part | |
| | Self::Art | |
| | Self::Sect | |
| | Self::Div | |
| | Self::BlockQuote | |
| | Self::Caption | |
| | Self::TOC | |
| | Self::TOCI | |
| | Self::Index | |
| | Self::NonStruct | |
| | Self::Private => StructRoleType::Grouping, | |
| Self::P | |
| | Self::StructuredHeading | |
| | Self::H1 | |
| | Self::H2 | |
| | Self::H3 | |
| | Self::H4 | |
| | Self::H5 | |
| | Self::H6 => { | |
| StructRoleType::BlockLevel(BlockLevelRoleSubtype::ParagraphLike) | |
| } | |
| Self::L | Self::LI | Self::Lbl | Self::LBody => { | |
| StructRoleType::BlockLevel(BlockLevelRoleSubtype::List) | |
| } | |
| Self::Table => StructRoleType::BlockLevel(BlockLevelRoleSubtype::Table), | |
| Self::TR | Self::TH | Self::TD | Self::THead | Self::TBody | Self::TFoot => { | |
| StructRoleType::Table | |
| } | |
| Self::Span | |
| | Self::Quote | |
| | Self::Note | |
| | Self::Reference | |
| | Self::BibEntry | |
| | Self::Code | |
| | Self::Ruby | |
| | Self::Warichu => { | |
| StructRoleType::InlineLevel(InlineLevelRoleSubtype::Generic) | |
| } | |
| Self::Link => StructRoleType::InlineLevel(InlineLevelRoleSubtype::Link), | |
| Self::Annot => { | |
| StructRoleType::InlineLevel(InlineLevelRoleSubtype::Annotation) | |
| } | |
| Self::RB | Self::RT | Self::RP | Self::WT | Self::WP => { | |
| StructRoleType::InlineLevel(InlineLevelRoleSubtype::RubyWarichu) | |
| } | |
| Self::Figure | Self::Formula | Self::Form => StructRoleType::Illustration, | |
| } | |
| } | |
| /// Return whether an element with this role can contain another | |
| /// structure element as a child. | |
| pub fn may_contain(self, other: StructRole) -> Containment { | |
| match (self, other) { | |
| (Self::TOC, Self::TOC | Self::TOCI) => Containment::Allowed, | |
| (Self::TOC, _) => Containment::Forbidden, | |
| ( | |
| Self::TOCI, | |
| Self::Lbl | Self::Reference | Self::NonStruct | Self::P | Self::TOC, | |
| ) => Containment::Allowed, | |
| (Self::TOCI, _) => Containment::Forbidden, | |
| (parent, Self::StructuredHeading) | |
| if matches!( | |
| parent.role_type(), | |
| StructRoleType::Grouping | StructRoleType::BlockLevel(_) | |
| ) => | |
| { | |
| Containment::Once | |
| } | |
| (_, Self::StructuredHeading) => Containment::Forbidden, | |
| (Self::L | Self::Table, Self::Caption) => Containment::Once, | |
| (Self::L, Self::LI) => Containment::Allowed, | |
| (Self::L, _) => Containment::Forbidden, | |
| (Self::LI, Self::Lbl | Self::LBody) => Containment::Allowed, | |
| (Self::LI, _) => Containment::Forbidden, | |
| (Self::Table, Self::TR | Self::THead | Self::TBody | Self::TFoot) => { | |
| Containment::Allowed | |
| } | |
| (Self::Table, _) => Containment::Forbidden, | |
| (Self::TR, Self::TH | Self::TD) => Containment::Allowed, | |
| (Self::TR, _) => Containment::Forbidden, | |
| (Self::THead | Self::TBody | Self::TFoot, Self::TR) => Containment::Allowed, | |
| (a, b) if a.role_type() == StructRoleType::Grouping => match b.role_type() { | |
| StructRoleType::BlockLevel(_) | StructRoleType::Grouping => { | |
| Containment::Allowed | |
| } | |
| _ => Containment::ChildMustBeBLSE, | |
| }, | |
| (Self::Note | Self::BibEntry, Self::Lbl) => Containment::Allowed, | |
| (Self::Ruby, Self::RB | Self::RT | Self::RP) => Containment::RubyRules, | |
| (_, Self::RB | Self::RT | Self::RP) => Containment::Forbidden, | |
| (Self::Ruby, _) => Containment::Forbidden, | |
| (Self::Warichu, Self::WT | Self::WP) => Containment::WarichuRules, | |
| (_, Self::WT | Self::WP) => Containment::Forbidden, | |
| (Self::Warichu, _) => Containment::Forbidden, | |
| (_, _) => Containment::Allowed, | |
| } | |
| } | |
| /// Return whether this structure element can contain a content element. | |
| pub fn may_have_content_item(self) -> bool { | |
| match self { | |
| Self::TOC | |
| | Self::TOCI | |
| | Self::L | |
| | Self::LI | |
| | Self::Table | |
| | Self::TR | |
| | Self::THead | |
| | Self::TBody | |
| | Self::TFoot => false, | |
| role if role.role_type() == StructRoleType::Grouping => { | |
| // Grouping elements can contain other elements, but not content | |
| // items. | |
| false | |
| } | |
| _ => true, | |
| } | |
| } | |
| } | |
| /// Type of the PDF 1.7 [structure element](StructRole) in the document, | |
| /// determining layout, permitted attributes, and nesting. | |
| #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] | |
| pub enum StructRoleType { | |
| /// Elements used solely to group other elements together. | |
| Grouping, | |
| /// Elements laid out across the block axis, also known as BLSE. | |
| BlockLevel(BlockLevelRoleSubtype), | |
| /// Elements laid out across the inline axis, also known as ILSE. | |
| InlineLevel(InlineLevelRoleSubtype), | |
| /// Elements whose contents consist of one or more graphics objects. | |
| Illustration, | |
| /// Elements that occur in a table, such as rows and cells. | |
| Table, | |
| } | |
| /// Subtypes of block-level structure roles, determining the layout and | |
| /// permitted attributes of the element. | |
| #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] | |
| pub enum BlockLevelRoleSubtype { | |
| /// Block-level elements containing predominantly text content. | |
| ParagraphLike, | |
| /// List-related elements, such as lists and list items. | |
| List, | |
| /// Table-related elements, such as tables and table rows. | |
| Table, | |
| } | |
| /// Subtypes of inline-level PDF 1.7 structure roles, determining the layout and | |
| /// permitted attributes of the element. | |
| #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] | |
| pub enum InlineLevelRoleSubtype { | |
| /// Generic inline elements, such as spans, quotes, and code. | |
| Generic, | |
| /// Links. | |
| Link, | |
| /// Superimposed annotations. | |
| Annotation, | |
| /// Ruby and Warichu annotations, which are used for CJK text. | |
| RubyWarichu, | |
| } | |
| /// Whether a particular structure element can contain another structure element. | |
| /// | |
| /// Created by [`StructRole2::may_contain`]. | |
| #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] | |
| pub enum Containment { | |
| /// The other element can occur. | |
| Allowed, | |
| /// The other element can occur only once as a child of this element. | |
| Once, | |
| /// The parent can contain this element when either condition is met: | |
| /// - It has two children: an RB element, followed by an RT element | |
| /// - It has three children: an RP element, followed by an RT element, and | |
| /// then an RP element | |
| RubyRules, | |
| /// The parent can contain this element when it has exactly three children: | |
| /// a WP element, followed by a WT element, and then another WP element. | |
| WarichuRules, | |
| /// This element can only contain `other` if `other` has a placement | |
| /// attribute promoting it to a block-level structure element (BLSE). | |
| ChildMustBeBLSE, | |
| /// The other element cannot occur as a child of this element. | |
| Forbidden, | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment