Basic Lexer and Start work on language backend

Hachem-H · Jul 31, 2023 · 37a4298 · 37a4298
1 parent 5d9bab9
commit 37a4298
Show file tree

Hide file tree

Showing 13 changed files with 256 additions and 9 deletions.
diff --git a/meson.build b/meson.build
@@ -1,9 +1,12 @@
 project('Iridis', 'cpp')
 
 iridisSourceFiles = [
-    'src/CMDInterface.cpp',
-    'src/Application.cpp',
-    'src/Log.cpp',
+    'src/Core/CMDInterface.cpp',
+    'src/Core/Application.cpp',
+    'src/Core/Log.cpp',
+
+    'src/Backend/Token.cpp',
+    'src/Backend/Lexer.cpp',
 
     'src/main.cpp'
 ]

diff --git a/src/Backend/Lexer.cpp b/src/Backend/Lexer.cpp
@@ -0,0 +1,42 @@
+#include "Lexer.h"
+
+#include <iostream>
+#include <cwctype>
+
+namespace Iridis
+{
+    namespace Lexer
+    {
+        std::vector<Token> Tokenize(std::wstring& source)
+        {
+            std::vector<std::wstring> stringTokens;
+            std::wstring buffer;
+
+            for (wchar_t character : source)
+            {
+                if (std::iswspace(character) || std::iswpunct(character))
+                {
+                    if (!buffer.empty())
+                    {
+                        stringTokens.push_back(buffer);
+                        buffer.clear();
+                    }
+
+                    if (std::iswpunct(character))
+                        stringTokens.push_back(std::wstring(1, character));
+                } 
+                else
+                    buffer += character;
+            }
+
+            if (!buffer.empty())
+                stringTokens.push_back(buffer);
+
+            std::vector<Token> tokens;
+            for (std::wstring stringToken : stringTokens)
+                tokens.push_back(Token::ToToken(stringToken));
+
+            return tokens;
+        }
+    };
+};
diff --git a/src/Backend/Lexer.h b/src/Backend/Lexer.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "Token.h"
+
+#include <vector>
+
+namespace Iridis
+{
+    namespace Lexer
+    {
+        std::vector<Token> Tokenize(std::wstring& source);
+    };
+};
diff --git a/src/Backend/Token.cpp b/src/Backend/Token.cpp
@@ -0,0 +1,67 @@
+#include "Token.h"
+
+#include <iostream>
+#include <string>
+
+namespace Iridis
+{
+    Token Token::ToToken(const std::wstring& buffer)
+    {
+             if (buffer == L"{")      return Token(Type::RCurlyBrace);
+        else if (buffer == L"}")      return Token(Type::LCurlyBrace);
+        else if (buffer == L"[")      return Token(Type::RBracket);
+        else if (buffer == L"]")      return Token(Type::LBracket);
+        else if (buffer == L"(")      return Token(Type::RParen);
+        else if (buffer == L")")      return Token(Type::LParen);
+
+        else if (buffer == L":")      return Token(Type::Colon);
+        else if (buffer == L";")      return Token(Type::SemiColon);
+        else if (buffer == L"^")      return Token(Type::Caret);
+        else if (buffer == L"&")      return Token(Type::Ampersand);
+        else if (buffer == L"!")      return Token(Type::Exclamation);
+
+        else if (buffer == L"+")      return Token(Type::Plus);
+        else if (buffer == L"-")      return Token(Type::Minus);
+        else if (buffer == L"*")      return Token(Type::Asterisk);
+        else if (buffer == L"/")      return Token(Type::Slash);
+        else if (buffer == L"=")      return Token(Type::Equal);
+
+        else if (buffer == L"proc")   return Token(Type::Procedure);
+        else if (buffer == L"struct") return Token(Type::Structure);
+        else if (buffer == L"enum")   return Token(Type::Enum);
+
+        // TODO(Hachem): Strings and Numbers
+        return Token(Type::Identifier, buffer);
+    }
+
+    std::wstring Token::ToString()
+    {
+             if (type == Type::RCurlyBrace) return L"RCurlyBrace";
+        else if (type == Type::LCurlyBrace) return L"LCurlyBrace";
+        else if (type == Type::RBracket)    return L"RBracket";
+        else if (type == Type::LBracket)    return L"LBracket";
+        else if (type == Type::RParen)      return L"RParen";
+        else if (type == Type::LParen)      return L"LParen";
+
+        else if (type == Type::Colon)       return L"Colon";
+        else if (type == Type::SemiColon)   return L"SemiColon";
+        else if (type == Type::Caret)       return L"Caret";
+        else if (type == Type::Ampersand)   return L"Ampersand";
+        else if (type == Type::Exclamation) return L"Exclamation";
+
+        else if (type == Type::Plus)        return L"Plus";
+        else if (type == Type::Minus)       return L"Minus";
+        else if (type == Type::Asterisk)    return L"Asterisk";
+        else if (type == Type::Slash)       return L"Slash";
+        else if (type == Type::Equal)       return L"Equal";
+
+        else if (type == Type::Procedure)   return L"Procedure";
+        else if (type == Type::Structure)   return L"Structure";
+        else if (type == Type::Enum)        return L"Enum";
+
+        else if (type == Type::Identifier)
+            return L"Identifier(" + identifier + L")";
+
+        else return L"Unknown Token";
+    }
+};
diff --git a/src/Backend/Token.h b/src/Backend/Token.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <string>
+#include <optional>
+
+namespace Iridis
+{
+    class Token
+    {
+    public:
+        // NOTE(Hachem): This enum is going to grow indefintely, for now I'm only going to support basic Token types.
+        // TODO(Hachem): Implement the other types.
+        enum class Type
+        {
+            Identifier,
+            Procedure,
+            Structure,
+            Enum,
+
+            RCurlyBrace,
+            LCurlyBrace,
+            RParen,
+            LParen,
+            RBracket,
+            LBracket,
+
+            Colon,      // NOTE(Hachem): Haha colon
+            SemiColon,
+            Caret,
+            Ampersand,
+            Exclamation,
+            Equal,
+
+            Plus,
+            Minus,
+            Asterisk,
+            Slash,
+
+            String,
+            Number,
+        };
+
+        Token(const int& number)
+            : type(Type::Number),
+              numberValue(number) { }
+
+        Token(const Type& type, const std::wstring& value)
+            : type(type)
+        {
+            if (type == Type::Identifier)
+                identifier = value;
+            if (type == Type::String)
+                stringValue = value;
+        }
+
+        Token(const Type& type)
+            : type(type) { }
+
+
+        inline const Type GetType() const { return type; }
+
+        inline std::optional<int> GetNumber() const
+        {
+            if (type == Type::Number)
+                return numberValue;
+            else
+                return { };
+        }
+
+        inline std::optional<std::wstring> GetIdentifier() const
+        {
+            if (type == Type::Identifier)
+                return identifier;
+            else
+                return { };
+        }
+
+        inline std::optional<std::wstring> GetStringValue() const
+        {
+            if (type == Type::String)
+                return identifier;
+            else
+                return { };
+        }
+
+        static Token ToToken(const std::wstring& buffer);
+        std::wstring ToString();
+    private:
+        Type type;
+
+        int numberValue          = 0;
+        std::wstring identifier  = L"";
+        std::wstring stringValue = L"";
+    };
+};
diff --git a/src/Application.cpp → src/Core/Application.cpp b/src/Application.cpp → src/Core/Application.cpp
@@ -1,6 +1,9 @@
 #include "Application.h"
 #include "Log.h"
 
+#include "Backend/Lexer.h"
+#include "Backend/Token.h"
+
 #include <filesystem>
 #include <iostream>
 #include <fstream>
@@ -100,8 +103,18 @@ namespace Iridis
 
     int Application::CompileFile(const std::string& path, const CompileOptions& compileOptions)
     {
-        // TODO(Hachem): Implement Compiler
-        std::this_thread::sleep_for(std::chrono::milliseconds(500 + rand() % 3500));
+        std::optional<std::wstring> file = ReadFile(&path[0]);
+
+        if (!file)
+        {
+            IRIDIS_CORE_ERROR("Could not load file");
+            return -1;
+        }
+
+        std::vector<Token> tokens =  Lexer::Tokenize(*file);
+        for (Token& token : tokens)
+            std::wcout << token.ToString() << std::endl;
+
         return 0;
     }
 

diff --git a/src/Application.h → src/Core/Application.h b/src/Application.h → src/Core/Application.h
diff --git a/src/CMDInterface.cpp → src/Core/CMDInterface.cpp b/src/CMDInterface.cpp → src/Core/CMDInterface.cpp
@@ -65,8 +65,21 @@ namespace Iridis
             return (int)Application::CompileProject(path, options);
         }
 
+        int Compile(int argc, char* argv[])
+        {
+            if (argc != 3)
+            {
+                IRIDIS_ERROR("File expected!\n");
+                Usages::PrintCompileHelp();
+                return -1;
+            }
+
+            const char* path = argv[2];
+            CompileOptions options = {};
+            return Application::CompileFile(path, options);
+        }
+
         int Run(int argc, char* argv[])     { return 0 ; }
-        int Compile(int argc, char* argv[]) { return 0 ; }
         int GenBind(int argc, char* argv[]) { return 0 ; }
     };
 

diff --git a/src/CMDInterface.h → src/Core/CMDInterface.h b/src/CMDInterface.h → src/Core/CMDInterface.h
diff --git a/src/Log.cpp → src/Core/Log.cpp b/src/Log.cpp → src/Core/Log.cpp
diff --git a/src/Log.h → src/Core/Log.h b/src/Log.h → src/Core/Log.h
@@ -39,6 +39,7 @@ namespace Iridis
 #define IRIDIS_ERROR(...) ::Iridis::Logger::GetGlobalLogger()->error(__VA_ARGS__)
 #define IRIDIS_WARN(...)  ::Iridis::Logger::GetGlobalLogger()->warn(__VA_ARGS__)
 #define IRIDIS_INFO(...)  ::Iridis::Logger::GetGlobalLogger()->info(__VA_ARGS__)
+#define IRIDIS_DEBUG(...)  ::Iridis::Logger::GetGlobalLogger()->debug(__VA_ARGS__)
 
 #define IRIDIS_CORE_ERROR(...) ::Iridis::Logger::GetInternalLogger()->error(__VA_ARGS__) 
 #define IRIDIS_CORE_WARN(...)  ::Iridis::Logger::GetInternalLogger()->warn(__VA_ARGS__) 

diff --git a/src/Platform.h → src/Core/Platform.h b/src/Platform.h → src/Core/Platform.h
diff --git a/src/main.cpp b/src/main.cpp
@@ -1,6 +1,6 @@
-#include "CMDInterface.h"
-#include "Application.h"
-#include "Log.h"
+#include "Core/CMDInterface.h"
+#include "Core/Application.h"
+#include "Core/Log.h"
 
 #include <unordered_map>
 #include <string>