-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Basic Lexer and Start work on language backend
- Loading branch information
Showing
13 changed files
with
256 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#include "Lexer.h" | ||
|
||
#include <iostream> | ||
#include <cwctype> | ||
|
||
namespace Iridis | ||
{ | ||
namespace Lexer | ||
{ | ||
std::vector<Token> Tokenize(std::wstring& source) | ||
{ | ||
std::vector<std::wstring> stringTokens; | ||
std::wstring buffer; | ||
|
||
for (wchar_t character : source) | ||
{ | ||
if (std::iswspace(character) || std::iswpunct(character)) | ||
{ | ||
if (!buffer.empty()) | ||
{ | ||
stringTokens.push_back(buffer); | ||
buffer.clear(); | ||
} | ||
|
||
if (std::iswpunct(character)) | ||
stringTokens.push_back(std::wstring(1, character)); | ||
} | ||
else | ||
buffer += character; | ||
} | ||
|
||
if (!buffer.empty()) | ||
stringTokens.push_back(buffer); | ||
|
||
std::vector<Token> tokens; | ||
for (std::wstring stringToken : stringTokens) | ||
tokens.push_back(Token::ToToken(stringToken)); | ||
|
||
return tokens; | ||
} | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#pragma once | ||
|
||
#include "Token.h" | ||
|
||
#include <vector> | ||
|
||
namespace Iridis | ||
{ | ||
namespace Lexer | ||
{ | ||
std::vector<Token> Tokenize(std::wstring& source); | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#include "Token.h" | ||
|
||
#include <iostream> | ||
#include <string> | ||
|
||
namespace Iridis | ||
{ | ||
Token Token::ToToken(const std::wstring& buffer) | ||
{ | ||
if (buffer == L"{") return Token(Type::RCurlyBrace); | ||
else if (buffer == L"}") return Token(Type::LCurlyBrace); | ||
else if (buffer == L"[") return Token(Type::RBracket); | ||
else if (buffer == L"]") return Token(Type::LBracket); | ||
else if (buffer == L"(") return Token(Type::RParen); | ||
else if (buffer == L")") return Token(Type::LParen); | ||
|
||
else if (buffer == L":") return Token(Type::Colon); | ||
else if (buffer == L";") return Token(Type::SemiColon); | ||
else if (buffer == L"^") return Token(Type::Caret); | ||
else if (buffer == L"&") return Token(Type::Ampersand); | ||
else if (buffer == L"!") return Token(Type::Exclamation); | ||
|
||
else if (buffer == L"+") return Token(Type::Plus); | ||
else if (buffer == L"-") return Token(Type::Minus); | ||
else if (buffer == L"*") return Token(Type::Asterisk); | ||
else if (buffer == L"/") return Token(Type::Slash); | ||
else if (buffer == L"=") return Token(Type::Equal); | ||
|
||
else if (buffer == L"proc") return Token(Type::Procedure); | ||
else if (buffer == L"struct") return Token(Type::Structure); | ||
else if (buffer == L"enum") return Token(Type::Enum); | ||
|
||
// TODO(Hachem): Strings and Numbers | ||
return Token(Type::Identifier, buffer); | ||
} | ||
|
||
std::wstring Token::ToString() | ||
{ | ||
if (type == Type::RCurlyBrace) return L"RCurlyBrace"; | ||
else if (type == Type::LCurlyBrace) return L"LCurlyBrace"; | ||
else if (type == Type::RBracket) return L"RBracket"; | ||
else if (type == Type::LBracket) return L"LBracket"; | ||
else if (type == Type::RParen) return L"RParen"; | ||
else if (type == Type::LParen) return L"LParen"; | ||
|
||
else if (type == Type::Colon) return L"Colon"; | ||
else if (type == Type::SemiColon) return L"SemiColon"; | ||
else if (type == Type::Caret) return L"Caret"; | ||
else if (type == Type::Ampersand) return L"Ampersand"; | ||
else if (type == Type::Exclamation) return L"Exclamation"; | ||
|
||
else if (type == Type::Plus) return L"Plus"; | ||
else if (type == Type::Minus) return L"Minus"; | ||
else if (type == Type::Asterisk) return L"Asterisk"; | ||
else if (type == Type::Slash) return L"Slash"; | ||
else if (type == Type::Equal) return L"Equal"; | ||
|
||
else if (type == Type::Procedure) return L"Procedure"; | ||
else if (type == Type::Structure) return L"Structure"; | ||
else if (type == Type::Enum) return L"Enum"; | ||
|
||
else if (type == Type::Identifier) | ||
return L"Identifier(" + identifier + L")"; | ||
|
||
else return L"Unknown Token"; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#pragma once | ||
|
||
#include <string> | ||
#include <optional> | ||
|
||
namespace Iridis | ||
{ | ||
class Token | ||
{ | ||
public: | ||
// NOTE(Hachem): This enum is going to grow indefintely, for now I'm only going to support basic Token types. | ||
// TODO(Hachem): Implement the other types. | ||
enum class Type | ||
{ | ||
Identifier, | ||
Procedure, | ||
Structure, | ||
Enum, | ||
|
||
RCurlyBrace, | ||
LCurlyBrace, | ||
RParen, | ||
LParen, | ||
RBracket, | ||
LBracket, | ||
|
||
Colon, // NOTE(Hachem): Haha colon | ||
SemiColon, | ||
Caret, | ||
Ampersand, | ||
Exclamation, | ||
Equal, | ||
|
||
Plus, | ||
Minus, | ||
Asterisk, | ||
Slash, | ||
|
||
String, | ||
Number, | ||
}; | ||
|
||
Token(const int& number) | ||
: type(Type::Number), | ||
numberValue(number) { } | ||
|
||
Token(const Type& type, const std::wstring& value) | ||
: type(type) | ||
{ | ||
if (type == Type::Identifier) | ||
identifier = value; | ||
if (type == Type::String) | ||
stringValue = value; | ||
} | ||
|
||
Token(const Type& type) | ||
: type(type) { } | ||
|
||
|
||
inline const Type GetType() const { return type; } | ||
|
||
inline std::optional<int> GetNumber() const | ||
{ | ||
if (type == Type::Number) | ||
return numberValue; | ||
else | ||
return { }; | ||
} | ||
|
||
inline std::optional<std::wstring> GetIdentifier() const | ||
{ | ||
if (type == Type::Identifier) | ||
return identifier; | ||
else | ||
return { }; | ||
} | ||
|
||
inline std::optional<std::wstring> GetStringValue() const | ||
{ | ||
if (type == Type::String) | ||
return identifier; | ||
else | ||
return { }; | ||
} | ||
|
||
static Token ToToken(const std::wstring& buffer); | ||
std::wstring ToString(); | ||
private: | ||
Type type; | ||
|
||
int numberValue = 0; | ||
std::wstring identifier = L""; | ||
std::wstring stringValue = L""; | ||
}; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters