Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make yychar long #605

Merged
merged 19 commits into from
Nov 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/md/lex-specs.md
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ interested in how to interface your generated scanner with Byacc/J.

- `%char`

Turns character counting on. The `int` member variable `yychar`
Turns character counting on. The `long` member variable `yychar`
contains the number of characters (starting with 0) from the
beginning of input to the beginning of the current token.

Expand Down Expand Up @@ -1159,7 +1159,7 @@ Currently, the API consists of the following methods and member fields:
contains the current line of input (starting with 0, only active
with the `lineCounting` directive)

- `int yychar`
- `long yychar`

contains the current character count in the input (starting with 0,
only active with the `charCounting` directive)
Expand Down
4 changes: 2 additions & 2 deletions docs/pt_BR/manual-pt_BR.tex
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,7 @@ \subsubsection{Line, character and column counting\label{Counting}}
\item
{\bf \texttt{\%char}}

Turns character counting on. The \texttt{int} member variable \texttt{yychar}
Turns character counting on. The \texttt{long} member variable \texttt{yychar}
contains the number of characters (starting with 0) from the beginning
of input to the beginning of the current token.

Expand Down Expand Up @@ -1819,7 +1819,7 @@ \subsubsection{Scanner methods and fields accessible in actions (API)\label{Scan
contains the current line of input (starting with 0, only active with
the \texttt{\htmlref{\%line}{Counting}} directive)

\item \texttt{int yychar}\\
\item \texttt{long yychar}\\
contains the current character count in the input (starting with 0,
only active with the \texttt{\htmlref{\%char}{Counting}} directive)

Expand Down
5 changes: 4 additions & 1 deletion javatests/jflex/testcase/large_input/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ java_test(
java_library(
name = "large_input_scanner",
srcs = [
"NegativeYyCharException.java",
"State.java",
":gen_large_input_scanner",
],
Expand All @@ -38,7 +39,9 @@ jflex(
java_library(
name = "repeat_content_reader",
srcs = ["RepeatContentReader.java"],
deps = [],
deps = [
"//third_party/com/google/guava",
],
)

java_test(
Expand Down
16 changes: 7 additions & 9 deletions javatests/jflex/testcase/large_input/LargeInputTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;
import static jflex.testing.assertion.MoreAsserts.assertThrows;

import com.google.common.io.CharSource;
import java.io.IOException;
Expand All @@ -16,21 +15,20 @@
*/
public class LargeInputTest {

/** Tests a well-formed input content larger than MAX_INT (2^32-1). */
/**
* Tests a well-formed input content larger than MAX_INT (2^32-1). The scanner should never
* encounter {@code zzchar < 0} and hence never throw {@link NegativeYyCharException}.
*/
@Test
public void consumeLargeInput() throws Exception {
final String content = "hello foo\n";
long size = Integer.MAX_VALUE / content.length() * content.length();
size += 3 * (long) content.length(); // a few more
final String content = "One every character the `yychar` is incremented, but don't overflow!\n";
long size = (long) Integer.MAX_VALUE + 3 * (long) content.length(); // a few more
assertWithMessage("Tests an input content larger than MAX_INT (2^32-1)")
.that(size)
.isGreaterThan((long) Integer.MAX_VALUE + 1L);
Reader largeContentReader = new RepeatContentReader(size, content);
LargeInputScanner scanner = createScanner(largeContentReader);
// FIX bug #536
// This is not expected, only how JFlex < 1.8 behaves. This assertion only demonstrates the
// test reproduces the bug.
assertThrows(IllegalStateException.class, () -> readUntilEof(scanner));
readUntilEof(scanner);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package jflex.testcase.large_input;

/** Thrown when the lexer's {@code zzchar} is negative, which should never happen. */
public class NegativeYyCharException extends IllegalStateException {
NegativeYyCharException(long yychar) {
super("The scanner has a negative number of read characters: " + yychar);
}
}
36 changes: 30 additions & 6 deletions javatests/jflex/testcase/large_input/RepeatContentReader.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package jflex.testcase.large_input;

import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Math.min;

import java.io.Reader;

/** A reader that returns the same content over and over again. */
public class RepeatContentReader extends Reader {

private static final int MIN_INTERNAL_BUFFER_SIZE = 64 * 1024;
/** Size of the precomputed buffer with the repeated content. */
private static final int PREPARED_BUFFER_SIZE = 64 * 1024;

/** The size of the content that this reader will provide. */
private final long size;
Expand All @@ -15,9 +18,18 @@ public class RepeatContentReader extends Reader {
/** How many characters have been read so far. */
private long read;

/**
* A reader thar returns {@code content} in loop, until the given {@code size} characters have
* been read.
*
* @param size The number of characters that this reader will return.
* @param content The content that this reader will return. If it's smaller than size, then it
* loops and reads the given content from the beginning again.
*/
RepeatContentReader(long size, String content) {
checkArgument(size > 0);
this.size = size;
this.content = createInternalContent(content);
this.content = createInternalContent(size, content);
}

@Override
Expand All @@ -42,13 +54,25 @@ public int read(char[] cbuf, int off, int len) {
@Override
public void close() {}

private static char[] createInternalContent(String content) {
private static char[] createInternalContent(long wantedSize, String content) {
char[] givenContent = content.toCharArray();
if (givenContent.length > MIN_INTERNAL_BUFFER_SIZE) {
if (wantedSize <= givenContent.length) {
// The given buffer is larger than what we will read.
// Actually, we could even take the substring for memory efficiency. But from tests, the given
// content is always small.
return givenContent;
}
// round down the size
int size = MIN_INTERNAL_BUFFER_SIZE / givenContent.length * givenContent.length;
// wantedSize > content.size(): The reader will loop over then content.
if (givenContent.length >= PREPARED_BUFFER_SIZE) {
// The given content is large enough. Nothing to do.
return givenContent;
}
// To maximize use of the buffer, we already prepare a repeated content of PREPARED_BUFFER_SIZE.
int size = PREPARED_BUFFER_SIZE / givenContent.length * givenContent.length;
if (size > wantedSize) {
// But we dont't need so much if we read less.
size = (int) (wantedSize / givenContent.length + 1) * givenContent.length;
}
char[] myContent = new char[size];
for (int destPos = 0; destPos < size; destPos += givenContent.length) {
System.arraycopy(givenContent, 0, myContent, destPos, givenContent.length);
Expand Down
2 changes: 1 addition & 1 deletion javatests/jflex/testcase/large_input/large_input.flex
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.util.zip.ZipException;
%%

^.+ { if (yychar < 0) {
throw new IllegalStateException("yychar must not be negative but is: " + yychar);
throw new NegativeYyCharException(yychar);
} else if (yychar <= Integer.MAX_VALUE) {
return State.BEFORE_2GB;
} else {
Expand Down
1 change: 1 addition & 0 deletions jflex/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## [JFlex 1.8.0](https://github.com/jflex-de/jflex/milestone/11)
- Unicode 10.0, 11.0, 12.0, and 12.1 are supported (#540, #555, #556, #563)
- Unicode Emoji properties are supported for Unicode versions 8.0+ (#546)
- `yychar` type has been changed from `int` to `long` (#605)
- JFlex build now depends on JDK 1.8+; JFlex runtime still supports JDK 7+

## [JFlex 1.7.0](https://github.com/jflex-de/jflex/milestone/10) (September 21, 2018)
Expand Down
16 changes: 13 additions & 3 deletions jflex/examples/simple/src/main/java/Yytoken.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@ class Yytoken {
public int m_index;
public String m_text;
public int m_line;
public int m_charBegin;
public int m_charEnd;
public long m_charBegin;
public long m_charEnd;

Yytoken(int index, String text, int line, int charBegin, int charEnd) {
Yytoken(int index, String text, int line, long charBegin, long charEnd) {
checkArgument("index", index >= 0);
checkArgument("line", line >= 0);
checkArgument("charBegin", charBegin >= 0);
checkArgument("charEnd", charEnd > 0);
m_index = index;
m_text = text;
m_line = line;
Expand All @@ -26,4 +30,10 @@ public String toString() {
+ "\ncEnd. : "
+ m_charEnd;
}

private static void checkArgument(String argName, boolean expectation) {
if (!expectation) {
throw new IllegalArgumentException(argName);
}
}
}
36 changes: 20 additions & 16 deletions jflex/src/main/jflex/skeleton.nested
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
private int yyline;

/** the number of characters up to the start of the matched text */
private int yychar;
private long yychar;

/**
* the number of characters from the last newline up to the start of the
Expand Down Expand Up @@ -93,7 +93,7 @@
int zzCurrentPos;
int zzMarkedPos;
int yyline;
int yychar;
long yychar;
int yycolumn;
char [] zzBuffer;
boolean zzAtBOL;
Expand All @@ -105,7 +105,8 @@
ZzFlexStreamInfo(java.io.Reader zzReader, int zzEndRead, int zzStartRead,
int zzCurrentPos, int zzMarkedPos, char [] zzBuffer,
boolean zzAtBOL, boolean zzAtEOF, boolean zzEOFDone,
int zzFinalHighSurrogate, int yyline, int yychar, int yycolumn) {
int zzFinalHighSurrogate, int yyline, long yychar,
int yycolumn) {
this.zzReader = zzReader;
this.zzEndRead = zzEndRead;
this.zzStartRead = zzStartRead;
Expand Down Expand Up @@ -223,14 +224,9 @@
zzMarkedPos, zzBuffer, zzAtBOL, zzAtEOF, zzEOFDone,
zzFinalHighSurrogate, yyline, yychar, yycolumn)
);
zzAtBOL = true;
zzAtEOF = false;
zzBuffer = new char[ZZ_BUFFERSIZE];
zzReader = reader;
zzEndRead = zzStartRead = 0;
zzCurrentPos = zzMarkedPos = 0;
zzFinalHighSurrogate = 0;
yyline = yychar = yycolumn = 0;
yyResetPosition();
}


Expand Down Expand Up @@ -291,18 +287,26 @@
*/
public final void yyreset(java.io.Reader reader) {
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
zzEOFDone = false;
zzEndRead = zzStartRead = 0;
zzCurrentPos = zzMarkedPos = 0;
zzFinalHighSurrogate = 0;
yyline = yychar = yycolumn = 0;
yyResetPosition();
zzLexicalState = YYINITIAL;
if (zzBuffer.length > ZZ_BUFFERSIZE)
if (zzBuffer.length > ZZ_BUFFERSIZE) {
zzBuffer = new char[ZZ_BUFFERSIZE];
}
}

private final void yyResetPosition() {
zzAtBOL = true;
zzAtEOF = false;
zzCurrentPos = 0;
zzMarkedPos = 0;
zzStartRead = 0;
zzEndRead = 0;
zzFinalHighSurrogate = 0;
yyline = 0;
yycolumn = 0;
yychar = 0L;
}

/**
* Returns the current lexical state.
Expand Down
21 changes: 14 additions & 7 deletions jflex/src/main/resources/jflex/skeleton.default
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
private int yyline;

/** Number of characters up to the start of the matched text. */
private int yychar;
private long yychar;

/** Number of characters from the last newline up to the start of the matched text. */
private int yycolumn;
Expand Down Expand Up @@ -177,19 +177,26 @@
*/
public final void yyreset(java.io.Reader reader) {
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
zzEOFDone = false;
zzEndRead = zzStartRead = 0;
zzCurrentPos = zzMarkedPos = 0;
zzFinalHighSurrogate = 0;
yyline = yychar = yycolumn = 0;
yyResetPosition();
zzLexicalState = YYINITIAL;
if (zzBuffer.length > ZZ_BUFFERSIZE) {
zzBuffer = new char[ZZ_BUFFERSIZE];
}
}

private final void yyResetPosition() {
zzAtBOL = true;
zzAtEOF = false;
zzCurrentPos = 0;
zzMarkedPos = 0;
zzStartRead = 0;
zzEndRead = 0;
zzFinalHighSurrogate = 0;
yyline = 0;
yycolumn = 0;
yychar = 0L;
}

/**
* Returns the current lexical state.
Expand Down
1 change: 0 additions & 1 deletion testsuite/testcases/src/test/cases/simple/.gitignore

This file was deleted.

19 changes: 0 additions & 19 deletions testsuite/testcases/src/test/cases/simple/Utility.java

This file was deleted.

Loading