Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement verbatim* #2133

Merged
merged 2 commits into from
Aug 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 56 additions & 35 deletions lib/LaTeXML/Package/LaTeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -1863,8 +1863,8 @@ DefRegister('\@itemdepth' => Number(0));
#======================================================================

# NOTE: how's the best way to get verbatim material through?
DefEnvironment('{verbatim}', '<ltx:verbatim>#body</ltx:verbatim>');
DefEnvironment('{verbatim*}', '<ltx:verbatim>#body</ltx:verbatim>');
#DefEnvironment('{verbatim}', '<ltx:verbatim>#body</ltx:verbatim>');
#DefEnvironment('{verbatim*}', '<ltx:verbatim>#body</ltx:verbatim>');

DefMacroI('\@verbatim', undef,
'\par\aftergroup\lx@end@verbatim\lx@@verbatim'); # Close enough?
Expand All @@ -1888,40 +1888,58 @@ DefConstructorI('\lx@end@verbatim', undef,
# and also the usual environment capture.
DefConstructorI(T_CS('\begin{verbatim}'), undef,
"<ltx:verbatim font='#font'>#body</ltx:verbatim>",
beforeDigest => [sub { $_[0]->bgroup;
my @stuff = ();
if (my $b = LookupValue('@environment@verbatim@atbegin')) {
push(@stuff, Digest(@$b)); }
AssignValue(current_environment => 'verbatim');
DefMacroI('\@currenvir', undef, 'verbatim');
MergeFont(family => 'typewriter');
# Digest(T_CS('\par')); # NO! See beforeConstruct!
@stuff; }],
afterDigest => [sub {
my ($stomach, $whatsit) = @_;
# $stomach->egroup;
my $font = $whatsit->getFont;
my $loc = $whatsit->getLocator;
my $end = "\\end{verbatim}";
my @lines = ();
my $gullet = $stomach->getGullet;
while (defined(my $line = $gullet->readRawLine)) {
# The raw chars will still have to be decoded (but not space!!)
$line = join('', map { ($_ eq ' ' ? ' ' : FontDecodeString($_, 'OT1_typewriter')) }
split(//, $line));
if ($line =~ /^(.*?)\\end\{verbatim\}(.*?)$/) {
push(@lines, $1 . "\n"); $gullet->unread(Tokenize($2), T_CR);
last; }
push(@lines, $line . "\n"); }
pop(@lines) if $lines[-1] eq "\n";
# Note last line ends up as Whatsit's "trailer"
if (my $b = LookupValue('@environment@verbatim@atend')) {
push(@lines, ToString(Digest(@$b))); }
$stomach->egroup;
$whatsit->setBody(map { Box($_, $font, $loc, T_OTHER($_)) } @lines, $end);
return; }],
beforeDigest => [sub { beforeDigestVerbatim(0, @_); }],
afterDigest => [sub { afterDigestVerbatim(0, @_); }],
beforeConstruct => sub { $_[0]->maybeCloseElement('ltx:p'); });

DefConstructorI(T_CS('\begin{verbatim*}'), undef,
"<ltx:verbatim font='#font'>#body</ltx:verbatim>",
beforeDigest => [sub { beforeDigestVerbatim(1, @_); }],
afterDigest => [sub { afterDigestVerbatim(1, @_); }],
beforeConstruct => sub { $_[0]->maybeCloseElement('ltx:p'); });

sub beforeDigestVerbatim {
my ($starred, $stomach) = @_;
$stomach->bgroup;
my @stuff = ();
if (my $b = LookupValue('@environment@verbatim@atbegin')) {
push(@stuff, Digest(@$b)); }
AssignValue(current_environment => 'verbatim');
DefMacroI('\@currenvir', undef, 'verbatim');
MergeFont(family => 'typewriter');
# Digest(T_CS('\par')); # NO! See beforeConstruct!
@stuff; }

sub afterDigestVerbatim {
my ($starred, $stomach, $whatsit) = @_;
# $stomach->egroup;
my $font = $whatsit->getFont;
my $loc = $whatsit->getLocator;
my $end = $starred ? '\end{verbatim*}' : '\end{verbatim}';
my $space = $starred ? "\x{2423}" : ' ';
my @lines = ();
my $gullet = $stomach->getGullet;
while (defined(my $line = $gullet->readRawLine)) {
my ($exiting, $remaining) = (0, undef);
if ($line =~ /^(.*?)\Q$end\E(.*?)$/) {
$exiting = 1;
$line = $1;
$remaining = $2; }
# The raw chars will still have to be decoded (but not space!!)
$line = join('', map { ($_ eq ' ' ? $space : FontDecodeString($_, 'OT1_typewriter')) }
split(//, $line));
push(@lines, $line . "\n");
if ($exiting) {
$gullet->unread(Tokenize($remaining), T_CR);
last; } }
pop(@lines) if $lines[-1] eq "\n";
# Note last line ends up as Whatsit's "trailer"
if (my $b = LookupValue('@environment@verbatim@atend')) {
push(@lines, ToString(Digest(@$b))); }
$stomach->egroup;
$whatsit->setBody(map { Box($_, $font, $loc, T_OTHER($_)) } @lines, $end);
return; }

DefPrimitiveI('\@vobeyspaces', undef, sub {
AssignCatcode(" " => 13);
Let(T_ACTIVE(" "), '\nobreakspace');
Expand All @@ -1935,11 +1953,14 @@ DefMacroI('\verb', undef, sub {
StartSemiverbatim('%', '\\', '{', '}');
$STATE->assignCatcode(' ', CC_ACTIVE);
my $init;
my $skippedSpace = 0;
# As of texlive 2021, DO skip spaces before delimiter (even tho we've changed catcodes)
# but if we do skip spaces, * can be the delimiter
do { $init = $gullet->readToken();
$skippedSpace = 1 if (defined $init && $init->getString eq ' ');
} while (defined $init && $init->getString eq ' ');
my $starred = 0;
if (T_OTHER('*')->equals($init)) {
if (T_OTHER('*')->equals($init) && !$skippedSpace) {
$starred = 1;
do { $init = $gullet->readToken();
} while (defined $init && $init->getString eq ' '); }
Expand Down
Binary file modified t/tokenize/verb.pdf
Binary file not shown.
10 changes: 9 additions & 1 deletion t/tokenize/verb.tex
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ \section{Verbatim Environment}
keep this too \end{verbatim} outside verbatim
More Normal {\bf Bold} stuff.

Verbatim 4:
\begin{verbatim*} keep this
AbNormal {\bf NonBold} stuff.
keep this too \end{verbatim*} outside verbatim
More Normal {\bf Bold} stuff.

% NOT allowed
%\section{Inline \verb|\verb| verbatim.}
\section{Inline verb command}
Expand All @@ -47,7 +53,7 @@ \section{Inline verb command}
Note that spaces after a control-sequence are skipped on the \emph{next} read,
by which time catcodes may have been changed.
Prior to April, 2020, a space following \verb|\verb| would -- surprisingly -- have been treated as
the delimiter! Since then, spaces are skipped and the following char is used as delimiter.
the delimiter! Since then, spaces are skipped and the following char (even *) is used as delimiter.
We'll adopt the newer approach.

\verb |a}b#c^d_e$f| stuff
Expand All @@ -56,6 +62,8 @@ \section{Inline verb command}

{\verb* Trick-roll} T }

\verb * + i n * o u t +

% Another tricky case
\makeatletter
\def\verbatimlisting#1{%
Expand Down
16 changes: 14 additions & 2 deletions t/tokenize/verb.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ still inside verbatim
<verbatim font="typewriter"> keep this
AbNormal {\bf NonBold} stuff.
keep this too
</verbatim>
<p>outside verbatim
More Normal <text font="bold">Bold</text> stuff.</p>
</para>
<para xml:id="S1.p5">
<p>Verbatim 4:</p>
<verbatim font="typewriter">␣keep␣this
AbNormal␣{\bf␣NonBold}␣stuff.
keep␣this␣too␣␣␣␣
</verbatim>
<p>outside verbatim
More Normal <text font="bold">Bold</text> stuff.</p>
Expand Down Expand Up @@ -78,7 +87,7 @@ More Normal <text font="bold">Bold</text> stuff.</p>
<p>Note that spaces after a control-sequence are skipped on the <emph font="italic">next</emph> read,
by which time catcodes may have been changed.
Prior to April, 2020, a space following <verbatim font="typewriter">\verb</verbatim> would – surprisingly – have been treated as
the delimiter! Since then, spaces are skipped and the following char is used as delimiter.
the delimiter! Since then, spaces are skipped and the following char (even *) is used as delimiter.
We’ll adopt the newer approach.</p>
</para>
<para xml:id="S2.p9">
Expand All @@ -91,9 +100,12 @@ We’ll adopt the newer approach.</p>
<p><verbatim font="typewriter">rick-roll}␣</verbatim></p>
</para>
<para xml:id="S2.p12">
<p>Input snippet</p>
<p><verbatim font="typewriter">+ i n </verbatim> o u t +</p>
</para>
<para xml:id="S2.p13">
<p>Input snippet</p>
</para>
<para xml:id="S2.p14">
<verbatim font="typewriter">|a}b#c^d_e$f|
</verbatim>
<p>after.</p>
Expand Down
Loading