Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lexer testing #380

Merged
merged 4 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions core/src/main/kotlin/com/strumenta/kolasu/model/Position.kt
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,96 @@ val Node.startLine: Int?

val Node.endLine: Int?
get() = this.position?.end?.line

/**
* Given the specified text and point it produces a position that will cover the text, minus the whitespace.
*
* If a null text is specified then a null position is returned.
*
* If a text with no leading or trailing whitespace is returned than this will return a position:
* - starting at the given start point
* - ending to the end point calculated "adding" the text to the start point
*
* If the text has leading whitespace, the start point will be advanced to skip such whitespace.
* Similarly, if the text has trailing whitespace the end point will be receded to skip such whitespace.
*/
fun strippedPosition(
text: String?,
start: Point
): Position? {
return text?.let { text ->
start.positionWithLength(text.length).stripPosition(text)
}
}

/**
* See strippedPosition.
*/
fun Position.stripPosition(text: String): Position {
if (text.isNotEmpty()) {
when (text.first()) {
' ' -> return this.advanceStart().stripPosition(text.substring(1))
}
}
if (text.isNotEmpty()) {
when (text.last()) {
' ' -> return this.recedeEnd().stripPosition(text.substring(0, text.length - 1))
}
}
val maxEnd = this.start + text
if (maxEnd.isBefore(this.end)) {
return Position(start, maxEnd)
}
return this
}

fun Position.advanceStart(): Position {
return Position(Point(start.line, start.column + 1), end)
}

fun Position.recedeEnd(): Position {
return Position(start, Point(end.line, end.column - 1))
}

private fun <K, V> createLeastRecentlyUsedMap(maxEntries: Int = 100): Map<K, V> {
return object : LinkedHashMap<K, V>(maxEntries * 10 / 7, 0.7f, true) {
override fun removeEldestEntry(eldest: Map.Entry<K, V>): Boolean {
return size > maxEntries
}
}
}

private object LinesSplitter {
val cache = createLeastRecentlyUsedMap<String, List<String>>() as MutableMap<String, List<String>>

fun getLines(code: String): List<String> {
return cache.getOrPut(code) {
code.split("(?<=\n)".toRegex())
}
}
}

/**
* Given a piece of code, it extracts from it the substring at the given position.
*/
fun String.codeAtPosition(position: Position): String {
try {
val lines = LinesSplitter.getLines(this)
var res: String

var currLine = position.start.line
if (position.start.line == position.end.line) {
return lines[currLine - 1].substring(position.start.column, position.end.column)
}
res = lines[currLine - 1].substring(position.start.column)
currLine++
while (currLine <= lines.size && currLine < position.end.line) {
res += lines[currLine - 1]
currLine++
}
res += lines[currLine - 1].substring(0, position.end.column)
return res
} catch (t: Throwable) {
throw RuntimeException("Unable to get position $position in text:\n```$this```")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ abstract class KolasuANTLRLexer<T : KolasuToken>(val tokenFactory: TokenFactory<
break
} else {
if (!onlyFromDefaultChannel || t.channel == Token.DEFAULT_CHANNEL) {
tokens.add(tokenFactory.convertToken(t))
if (t.type != Token.EOF) {
tokens.add(tokenFactory.convertToken(t))
}
last = t
}
}
Expand Down
8 changes: 7 additions & 1 deletion core/src/main/kotlin/com/strumenta/kolasu/parsing/Parsing.kt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,12 @@ data class TokenCategory(val type: String) {
val KEYWORD = TokenCategory("Keyword")
val NUMERIC_LITERAL = TokenCategory("Numeric literal")
val STRING_LITERAL = TokenCategory("String literal")
val OTHER_LITERAL = TokenCategory("Other literal")
val PLAIN_TEXT = TokenCategory("Plain text")
val WHITESPACE = TokenCategory("Whitespace")
val IDENTIFIER = TokenCategory("Identifier")
val PUNCTUATION = TokenCategory("Punctuation")
val OPERATOR = TokenCategory("Operator")
}
}

Expand Down Expand Up @@ -224,7 +229,8 @@ interface KolasuLexer<T : KolasuToken> : Serializable {
/**
* Performs "lexing" on the given code stream, i.e., it breaks it into tokens.
*/
fun lex(file: File): LexingResult<T> = BufferedInputStream(FileInputStream(file)).use { lex(it) }
fun lex(file: File, charset: Charset = Charsets.UTF_8, onlyFromDefaultChannel: Boolean = true): LexingResult<T> =
BufferedInputStream(FileInputStream(file)).use { lex(it, charset, onlyFromDefaultChannel) }
}

fun Lexer.injectErrorCollectorInLexer(issues: MutableList<Issue>) {
Expand Down
89 changes: 89 additions & 0 deletions core/src/main/kotlin/com/strumenta/kolasu/testing/LexerTesting.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package com.strumenta.kolasu.testing

import com.strumenta.kolasu.model.START_POINT
import com.strumenta.kolasu.model.codeAtPosition
import com.strumenta.kolasu.parsing.KolasuLexer
import com.strumenta.kolasu.parsing.KolasuToken
import java.io.File
import java.io.InputStream
import java.nio.charset.Charset
import kotlin.test.assertEquals

fun<T : KolasuToken> checkFileTokenization(
file: File,
lexer: KolasuLexer<T>,
charset: Charset = Charsets.UTF_8
): List<T> {
require(file.exists())
require(file.isFile())
require(file.canRead())
val code = file.readText(charset = charset)
return checkTokenization(code, lexer)
}

fun<T : KolasuToken> checkTokenization(
inputStream: InputStream,
lexer: KolasuLexer<T>,
charset: Charset = Charsets.UTF_8
): List<T> {
val code = inputStream.bufferedReader(charset = charset).use { it.readText() }
return checkTokenization(code, lexer)
}

fun<T : KolasuToken> checkTokenization(code: String, lexer: KolasuLexer<T>): List<T> {
val lexingResult = lexer.lex(code, onlyFromDefaultChannel = false)
require(lexingResult.issues.isEmpty()) {
"Lexing issues occurred: ${lexingResult.issues}"
}
checkTokensAreCoveringText(code, lexingResult.tokens)
return lexingResult.tokens
}

fun<T : KolasuToken> checkTokensAreCoveringText(code: String, tokens: List<T>) {
require(code.isEmpty() == tokens.isEmpty())
if (code.isEmpty()) {
return
}

// Tokens should be in order and they should cover without gaps or overlaps
// the text from the very start to the very end of the code

var prevToken: KolasuToken? = null
tokens.forEach { token ->
if (prevToken == null) {
// This is the first token, so we should start at the very beginning
assertEquals(
token.position.start,
START_POINT,
"The first token is expected to be at the start position $START_POINT while it is ast " +
"${token.position.start}"
)
} else {
assertEquals(
token.position.start,
prevToken!!.position.end,
"Token $token does not immediately follow $prevToken"
)
}

// The text specified in tokens should be as long as the position indicated
assertEquals(
token.position.start + token.text,
token.position.end,
"We have a token with position ${token.position} and text '${token.text}'"
)

// The text specified in the tokens should correspond to the corresponding code
val expectedText = code.codeAtPosition(token.position)
assertEquals(
expectedText,
token.text,
"At position ${token.position} we found '${token.text}' while we expected '$expectedText'"
)

prevToken = token
}

val codeEnd = START_POINT + code
assertEquals(prevToken!!.position.end, codeEnd)
}
Loading