Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Add support for list elements. #5539

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions phpcs.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,15 @@
<exclude-pattern>/wp-tests-config-sample\.php</exclude-pattern>
</rule>

<!-- Exclude forbidding goto in the HTML Processor, which mimics algorithms that are written
this way in the HTML specification, and these particular algorithms are complex and
highly imperative. Avoiding the goto introduces a number of risks that could make it
more difficult to maintain the relationship to the standard, lead to subtle differences
in the parsing, and distance the code from its standard. -->
<rule ref="Generic.PHP.DiscourageGoto.Found">
<exclude-pattern>/wp-includes/html-api/class-wp-html-processor\.php</exclude-pattern>
</rule>

<!-- Exclude sample config from modernization to prevent breaking CI workflows based on WP-CLI scaffold.
See: https://core.trac.wordpress.org/ticket/48082#comment:16 -->
<rule ref="Modernize.FunctionCalls.Dirname.FileConstant">
Expand Down
32 changes: 24 additions & 8 deletions src/wp-includes/html-api/class-wp-html-open-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
}

if ( in_array( $node->node_name, $termination_list, true ) ) {
return true;
return false;
}
}

Expand Down Expand Up @@ -166,18 +166,22 @@ public function has_element_in_scope( $tag_name ) {
* Returns whether a particular element is in list item scope.
*
* @since 6.4.0
* @since 6.5.0 Implemented: no longer throws on every invocation.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
*
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
*
* @param string $tag_name Name of tag to check.
* @return bool Whether given element is in scope.
*/
public function has_element_in_list_item_scope( $tag_name ) {
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' );

return false; // The linter requires this unreachable code until the function is implemented and can return.
return $this->has_element_in_specific_scope(
$tag_name,
array(
// There are more elements that belong here which aren't currently supported.
'OL',
'UL',
)
);
}

/**
Expand Down Expand Up @@ -375,10 +379,22 @@ public function walk_down() {
* see WP_HTML_Open_Elements::walk_down().
*
* @since 6.4.0
* @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists.
*
* @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists.
*/
public function walk_up() {
public function walk_up( $above_this_node = null ) {
$has_found_node = null === $above_this_node;

for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
yield $this->stack[ $i ];
$node = $this->stack[ $i ];

if ( ! $has_found_node ) {
$has_found_node = $node === $above_this_node;
continue;
}

yield $node;
}
}

Expand Down
115 changes: 114 additions & 1 deletion src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
* - Links: A.
* - Lists: DL.
* - Lists: DD, DL, DT, LI, OL, LI.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it should this be UL.

- *  - Lists: DD, DL, DT, LI, OL, LI.
+ *  - Lists: DD, DL, DT, LI, OL, UL.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! Will fix this.

* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
* - Paragraph: P.
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
Expand Down Expand Up @@ -648,10 +648,12 @@ private function step_in_body() {
case '+MAIN':
case '+MENU':
case '+NAV':
case '+OL':
case '+P':
case '+SEARCH':
case '+SECTION':
case '+SUMMARY':
case '+UL':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
Expand Down Expand Up @@ -685,9 +687,11 @@ private function step_in_body() {
case '-MAIN':
case '-MENU':
case '-NAV':
case '-OL':
case '-SEARCH':
case '-SECTION':
case '-SUMMARY':
case '-UL':
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
// @todo Report parse error.
// Ignore the token.
Expand Down Expand Up @@ -755,6 +759,109 @@ private function step_in_body() {
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
return true;

/*
* > A start tag whose tag name is "li"
* > A start tag whose tag name is one of: "dd", "dt"
*/
case '+DD':
case '+DT':
case '+LI':
$this->state->frameset_ok = false;
$node = $this->state->stack_of_open_elements->current_node();
$is_li = 'LI' === $tag_name;

in_body_list_loop:
/*
* The logic for LI and DT/DD is the same except for one point: LI elements _only_
* close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
*/
if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
$node_name = $is_li ? 'LI' : $node->node_name;
$this->generate_implied_end_tags( $node_name );
if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
}

$this->state->stack_of_open_elements->pop_until( $node_name );
goto in_body_list_done;
}

if (
'ADDRESS' !== $node->node_name &&
'DIV' !== $node->node_name &&
'P' !== $node->node_name &&
$this->is_special( $node->node_name )
) {
/*
* > If node is in the special category, but is not an address, div,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The > character should probably be removed here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the > is included to indicate it's a quote. These are quoting the HTML standard directly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirming what @sirreal wrote. These are linked to the HTML5 living spec by words to make it easier to search, and the > is attempting to indicate that it's a quote. It also corresponds to every block within the step_in_body() function.

I'm happy to entertain suggestions to improve the linking, but I don't want to make it more difficult for people to cross-reference the spec by removing these quotes. Unfortunately the spec doesn't add anchors to each of these sections so there's no clear link (which is one of the original reasons I added the quotes, beyond the obvious that it puts the intention of the spec at the front in the code).

* > or p element, then jump to the step labeled done below.
*/
goto in_body_list_done;
} else {
/*
* > Otherwise, set node to the previous entry in the stack of open elements
dmsnell marked this conversation as resolved.
Show resolved Hide resolved
* > and return to the step labeled loop.
*/
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
$node = $item;
break;
}
goto in_body_list_loop;
}

in_body_list_done:
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}

$this->insert_html_element( $this->state->current_token );
return true;

/*
* > An end tag whose tag name is "li"
dmsnell marked this conversation as resolved.
Show resolved Hide resolved
* > An end tag whose tag name is one of: "dd", "dt"
*/
case '-DD':
case '-DT':
case '-LI':
if (
/*
* An end tag whose tag name is "li":
* If the stack of open elements does not have an li element in list item scope,
* then this is a parse error; ignore the token.
*/
(
'LI' === $tag_name &&
! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
) ||
/*
* An end tag whose tag name is one of: "dd", "dt":
* If the stack of open elements does not have an element in scope that is an
* HTML element with the same tag name as that of the token, then this is a
* parse error; ignore the token.
*/
(
'LI' !== $tag_name &&
! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name )
)
) {
/*
* This is a parse error, ignore the token.
*
* @todo Indicate a parse error once it's possible.
*/
return $this->step();
}

$this->generate_implied_end_tags( $tag_name );

if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
}

$this->state->stack_of_open_elements->pop_until( $tag_name );
return true;

/*
* > An end tag whose tag name is "p"
*/
Expand Down Expand Up @@ -1223,6 +1330,9 @@ private function close_a_p_element() {
*/
private function generate_implied_end_tags( $except_for_this_element = null ) {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'P',
);

Expand All @@ -1248,6 +1358,9 @@ private function generate_implied_end_tags( $except_for_this_element = null ) {
*/
private function generate_implied_end_tags_thoroughly() {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'P',
);

Expand Down
5 changes: 0 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,6 @@ public function data_unsupported_special_in_body_tags() {
'CAPTION' => array( 'CAPTION' ),
'COL' => array( 'COL' ),
'COLGROUP' => array( 'COLGROUP' ),
'DD' => array( 'DD' ),
'DT' => array( 'DT' ),
'EMBED' => array( 'EMBED' ),
'FORM' => array( 'FORM' ),
'FRAME' => array( 'FRAME' ),
Expand All @@ -180,7 +178,6 @@ public function data_unsupported_special_in_body_tags() {
'IFRAME' => array( 'IFRAME' ),
'INPUT' => array( 'INPUT' ),
'KEYGEN' => array( 'KEYGEN' ),
'LI' => array( 'LI' ),
'LINK' => array( 'LINK' ),
'LISTING' => array( 'LISTING' ),
'MARQUEE' => array( 'MARQUEE' ),
Expand All @@ -191,7 +188,6 @@ public function data_unsupported_special_in_body_tags() {
'NOFRAMES' => array( 'NOFRAMES' ),
'NOSCRIPT' => array( 'NOSCRIPT' ),
'OBJECT' => array( 'OBJECT' ),
'OL' => array( 'OL' ),
'OPTGROUP' => array( 'OPTGROUP' ),
'OPTION' => array( 'OPTION' ),
'PARAM' => array( 'PARAM' ),
Expand All @@ -218,7 +214,6 @@ public function data_unsupported_special_in_body_tags() {
'TITLE' => array( 'TITLE' ),
'TR' => array( 'TR' ),
'TRACK' => array( 'TRACK' ),
'UL' => array( 'UL' ),
'WBR' => array( 'WBR' ),
'XMP' => array( 'XMP' ),
);
Expand Down
Loading
Loading