From 202c1d2ca2ff332a06b43df9752605ee21ef821b Mon Sep 17 00:00:00 2001 From: Clemens-Dautermann Date: Thu, 20 Aug 2020 17:11:43 +0200 Subject: [PATCH] lexing and parsing unary operators --- .../.idea.Compiler/.idea/contentModel.xml | 2 + .../.idea/.idea.Compiler/.idea/workspace.xml | 175 +++++++++--------- Compiler/Compiler.cs | 111 ++++++++--- Compiler/Lexer/Lexer.cs | 7 + Compiler/Lexer/TokenType.cs | 5 + Compiler/Parser/Nodes/NodeType.cs | 3 +- Compiler/Parser/Nodes/OperatorType.cs | 10 + Compiler/Parser/Nodes/UnaryOperatorNode.cs | 14 ++ Compiler/Parser/Parser.cs | 57 ++++-- grammar | 3 +- stage_2/invalid/missing_const.c | 3 + stage_2/invalid/missing_semicolon.c | 3 + stage_2/invalid/nested_missing_const.c | 3 + stage_2/invalid/wrong_order.c | 3 + stage_2/valid/bitwise.c | 3 + stage_2/valid/bitwise_zero.c | 3 + stage_2/valid/neg.c | 3 + stage_2/valid/nested_ops.c | 3 + stage_2/valid/nested_ops_2.c | 3 + stage_2/valid/not_five.c | 3 + stage_2/valid/not_zero.c | 3 + 21 files changed, 290 insertions(+), 130 deletions(-) create mode 100644 Compiler/Parser/Nodes/OperatorType.cs create mode 100644 Compiler/Parser/Nodes/UnaryOperatorNode.cs create mode 100644 stage_2/invalid/missing_const.c create mode 100644 stage_2/invalid/missing_semicolon.c create mode 100644 stage_2/invalid/nested_missing_const.c create mode 100644 stage_2/invalid/wrong_order.c create mode 100644 stage_2/valid/bitwise.c create mode 100644 stage_2/valid/bitwise_zero.c create mode 100644 stage_2/valid/neg.c create mode 100644 stage_2/valid/nested_ops.c create mode 100644 stage_2/valid/nested_ops_2.c create mode 100644 stage_2/valid/not_five.c create mode 100644 stage_2/valid/not_zero.c diff --git a/Compiler/.idea/.idea.Compiler/.idea/contentModel.xml b/Compiler/.idea/.idea.Compiler/.idea/contentModel.xml index 89a15d2..ecd131e 100644 --- a/Compiler/.idea/.idea.Compiler/.idea/contentModel.xml +++ b/Compiler/.idea/.idea.Compiler/.idea/contentModel.xml @@ -29,8 +29,10 @@ + + diff --git a/Compiler/.idea/.idea.Compiler/.idea/workspace.xml b/Compiler/.idea/.idea.Compiler/.idea/workspace.xml index a539953..8cb404b 100644 --- a/Compiler/.idea/.idea.Compiler/.idea/workspace.xml +++ b/Compiler/.idea/.idea.Compiler/.idea/workspace.xml @@ -20,33 +20,27 @@ - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + @@ -144,12 +142,12 @@ - + @@ -213,106 +212,110 @@ - + - + + + + + - + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - + - + - - + + - + @@ -325,14 +328,18 @@ - - - - - + - + + + + + + + + + diff --git a/Compiler/Compiler.cs b/Compiler/Compiler.cs index 2dbe8b3..c715648 100644 --- a/Compiler/Compiler.cs +++ b/Compiler/Compiler.cs @@ -19,44 +19,79 @@ namespace Compiler } else { - bool debug = false; - string inputFileName = args[0].Split("/").Last(); - string outputPath = args[0].Substring(0, args[0].LastIndexOf("/")); - - if (args.Length == 2) + if (args[0] != "--dev-mode--") { - if (args[2] == "-v") - { - debug = true; - } + NonDevMode(args); + } + else + { + DevMode(); + } + } + } + + static void DevMode() + { + for (int i = 1; i <= 2; i++) + { + Console.WriteLine($"---------------------valid, stage {i}-------------------------------"); + foreach (string file in Directory.GetFiles($"/home/clemens/repositorys/lcc/stage_{i}/valid")) + { + Console.WriteLine("-------------"); + List tokens = TestLexer(file, 0); + TestParser(tokens, file, 1); } - Compile(args[0], $"{outputPath}/assembly.s", debug); - if (debug) + + Console.WriteLine($"---------------------invalid, stage {i}-------------------------------"); + foreach (string file in Directory.GetFiles($"/home/clemens/repositorys/lcc/stage_{i}/invalid")) { - Console.WriteLine($"Compiled to {outputPath}/assembly.s"); + Console.WriteLine("-------------"); + List tokens = TestLexer(file, 0); + TestParser(tokens, file, 1); } + } + } - ProcessStartInfo startInfo = new ProcessStartInfo() - { - FileName = "gcc", - Arguments = $"{outputPath}/assembly.s -o {outputPath}/{inputFileName.Replace(".c", "")}" - }; - Process proc = new Process() {StartInfo = startInfo,}; - proc.Start(); + static void NonDevMode(string[] args) + { + bool debug = false; + string inputFileName = args[0].Split("/").Last(); + string outputPath = args[0].Substring(0, args[0].LastIndexOf("/")); - while (!proc.HasExited) + if (args.Length == 2) + { + if (args[2] == "-v") { - Thread.Sleep(1); + debug = true; } + } - File.Delete($"{outputPath}/assembly.s"); + Compile(args[0], $"{outputPath}/assembly.s", debug); + if (debug) + { + Console.WriteLine($"Compiled to {outputPath}/assembly.s"); + } - if (debug) - { - Console.WriteLine($"Assembled to {outputPath}/program"); - Console.WriteLine("Deleted assembly.s file. Done!"); - } + ProcessStartInfo startInfo = new ProcessStartInfo() + { + FileName = "gcc", + Arguments = $"{outputPath}/assembly.s -o {outputPath}/{inputFileName.Replace(".c", "")}" + }; + Process proc = new Process() {StartInfo = startInfo,}; + proc.Start(); + + while (!proc.HasExited) + { + Thread.Sleep(1); + } + + File.Delete($"{outputPath}/assembly.s"); + + if (debug) + { + Console.WriteLine($"Assembled to {outputPath}/program"); + Console.WriteLine("Deleted assembly.s file. Done!"); } } @@ -102,7 +137,27 @@ namespace Compiler static void PrettyPrint(Node root, string indent) { - Console.WriteLine(indent + root.NodeType); + switch (root.NodeType) + { + case NodeType.ExpressionNode: + if (root is UnaryOperatorNode) + { + Console.WriteLine(indent + root.NodeType + ":" + ((UnaryOperatorNode) root).OperatorType); + } + + if (root is ConstantNode) + { + Console.WriteLine(indent + root.NodeType + ":" + ((ConstantNode) root).value); + } + break; + case NodeType.FunctionNode: + Console.WriteLine(indent + root.NodeType + ":" + ((FunctionNode) root).Name); + break; + case NodeType.ProgramNode: + case NodeType.ReturnStatementNode: + break; + } + foreach (Node child in root.Children) { PrettyPrint(child, indent + " "); diff --git a/Compiler/Lexer/Lexer.cs b/Compiler/Lexer/Lexer.cs index 4d2e4d3..3bcef61 100644 --- a/Compiler/Lexer/Lexer.cs +++ b/Compiler/Lexer/Lexer.cs @@ -47,6 +47,9 @@ namespace Compiler.Lexer patterns.Add(new Pattern(@"^;", TokenType.SemicolonToken)); patterns.Add(new Pattern(@"^[a-zA-Z]\w*", TokenType.IdentifierToken)); patterns.Add(new Pattern(@"^[0-9]+", TokenType.IntegerLiteralToken)); + patterns.Add(new Pattern(@"^-", TokenType.NegationToken)); + patterns.Add(new Pattern(@"^~", TokenType.BitwiseComplementToken)); + patterns.Add(new Pattern(@"^!", TokenType.LogicalNegationToken)); //try each pattern do determine if it is the one matching at the beginning //TODO: There sure is room for optimization here @@ -83,6 +86,10 @@ namespace Compiler.Lexer case TokenType.CloseBraceToken: case TokenType.IntToken: case TokenType.SemicolonToken: + case TokenType.NegationToken: + case TokenType.BitwiseComplementToken: + case TokenType.LogicalNegationToken: + case TokenType.InvalidToken: break; default: t.TokenType = TokenType.InvalidToken; diff --git a/Compiler/Lexer/TokenType.cs b/Compiler/Lexer/TokenType.cs index 4774217..94fed3b 100644 --- a/Compiler/Lexer/TokenType.cs +++ b/Compiler/Lexer/TokenType.cs @@ -11,6 +11,11 @@ namespace Compiler.Lexer SemicolonToken, IdentifierToken, IntegerLiteralToken, + + //unary operator tokens + NegationToken, + BitwiseComplementToken, + LogicalNegationToken, //special Token to represent invalid matches InvalidToken, diff --git a/Compiler/Parser/Nodes/NodeType.cs b/Compiler/Parser/Nodes/NodeType.cs index a60cebd..d760db2 100644 --- a/Compiler/Parser/Nodes/NodeType.cs +++ b/Compiler/Parser/Nodes/NodeType.cs @@ -5,6 +5,7 @@ namespace Compiler.Parser.Nodes ProgramNode, FunctionNode, ReturnStatementNode, - ExpressionNode + ExpressionNode, + } } \ No newline at end of file diff --git a/Compiler/Parser/Nodes/OperatorType.cs b/Compiler/Parser/Nodes/OperatorType.cs new file mode 100644 index 0000000..5b765f0 --- /dev/null +++ b/Compiler/Parser/Nodes/OperatorType.cs @@ -0,0 +1,10 @@ +namespace Compiler.Parser.Nodes +{ + public enum OperatorType + { + //unary operators + Negation, + BitwiseComplement, + LogicalNegation, + } +} \ No newline at end of file diff --git a/Compiler/Parser/Nodes/UnaryOperatorNode.cs b/Compiler/Parser/Nodes/UnaryOperatorNode.cs new file mode 100644 index 0000000..bdb6266 --- /dev/null +++ b/Compiler/Parser/Nodes/UnaryOperatorNode.cs @@ -0,0 +1,14 @@ +namespace Compiler.Parser.Nodes +{ + public sealed class UnaryOperatorNode : Node + { + public override NodeType NodeType { get; set; } + public OperatorType OperatorType { get; set; } + + public UnaryOperatorNode(OperatorType operatorType) + { + OperatorType = operatorType; + NodeType = NodeType.ExpressionNode; + } + } +} \ No newline at end of file diff --git a/Compiler/Parser/Parser.cs b/Compiler/Parser/Parser.cs index ee3a5d5..ebb58ed 100644 --- a/Compiler/Parser/Parser.cs +++ b/Compiler/Parser/Parser.cs @@ -112,26 +112,51 @@ namespace Compiler.Parser throw new MissingTokenException(TokenType.IntegerLiteralToken); } - Token constantToken = _tokenList[0]; + Token expressionToken = _tokenList[0]; - //check if TokenType is right - if (constantToken.TokenType != TokenType.IntegerLiteralToken) + //the next token might be a constant or any operator + switch (expressionToken.TokenType) { - throw new UnexpectedTokenException(TokenType.IntToken, constantToken.TokenType); - } - else - { - //remove int literal token - _tokenList.RemoveAt(0); + case TokenType.IntegerLiteralToken: + //remove int literal token + _tokenList.RemoveAt(0); - //check if value Type is right - if (constantToken.Value.GetType() != typeof(int)) - { - throw new WrongTypeException(typeof(int), constantToken.Value.GetType()); - } + //check if value Type is right + if (expressionToken.Value.GetType() != typeof(int)) + { + throw new WrongTypeException(typeof(int), expressionToken.Value.GetType()); + } - //return final constant node to end recursion - n = new ConstantNode((int) constantToken.Value); + //return final constant node to end recursion + n = new ConstantNode((int) expressionToken.Value); + break; + case TokenType.NegationToken: + _tokenList.RemoveAt(0); + n = new UnaryOperatorNode(OperatorType.Negation); + n.Children.Add(Parse(NodeType.ExpressionNode)); + break; + case TokenType.BitwiseComplementToken: + _tokenList.RemoveAt(0); + n = new UnaryOperatorNode(OperatorType.BitwiseComplement); + n.Children.Add(Parse(NodeType.ExpressionNode)); + break; + case TokenType.LogicalNegationToken: + _tokenList.RemoveAt(0); + n = new UnaryOperatorNode(OperatorType.LogicalNegation); + n.Children.Add(Parse(NodeType.ExpressionNode)); + break; + case TokenType.IntToken: + case TokenType.OpenParenthesisToken: + case TokenType.CloseParenthesisToken: + case TokenType.OpenBraceToken: + case TokenType.CloseBraceToken: + case TokenType.ReturnToken: + case TokenType.SemicolonToken: + case TokenType.IdentifierToken: + case TokenType.InvalidToken: + throw new UnexpectedTokenException(TokenType.IntToken, expressionToken.TokenType); + default: + throw new UnexpectedTokenException(TokenType.IntToken, expressionToken.TokenType); } break; diff --git a/grammar b/grammar index 24ea767..f498b97 100644 --- a/grammar +++ b/grammar @@ -1,4 +1,5 @@ ::= ::= "int" "(" ")" "{" "}" ::= "return" ";" - ::= + ::= | + ::= "!" | "~" | "-" \ No newline at end of file diff --git a/stage_2/invalid/missing_const.c b/stage_2/invalid/missing_const.c new file mode 100644 index 0000000..6dd069e --- /dev/null +++ b/stage_2/invalid/missing_const.c @@ -0,0 +1,3 @@ +int main() { + return !; +} \ No newline at end of file diff --git a/stage_2/invalid/missing_semicolon.c b/stage_2/invalid/missing_semicolon.c new file mode 100644 index 0000000..5570d64 --- /dev/null +++ b/stage_2/invalid/missing_semicolon.c @@ -0,0 +1,3 @@ +int main() { + return !5 +} \ No newline at end of file diff --git a/stage_2/invalid/nested_missing_const.c b/stage_2/invalid/nested_missing_const.c new file mode 100644 index 0000000..43b7097 --- /dev/null +++ b/stage_2/invalid/nested_missing_const.c @@ -0,0 +1,3 @@ +int main() { + return !~; +} \ No newline at end of file diff --git a/stage_2/invalid/wrong_order.c b/stage_2/invalid/wrong_order.c new file mode 100644 index 0000000..27a9f02 --- /dev/null +++ b/stage_2/invalid/wrong_order.c @@ -0,0 +1,3 @@ +int main() { + return 4-; +} \ No newline at end of file diff --git a/stage_2/valid/bitwise.c b/stage_2/valid/bitwise.c new file mode 100644 index 0000000..0eee672 --- /dev/null +++ b/stage_2/valid/bitwise.c @@ -0,0 +1,3 @@ +int main() { + return !12; +} \ No newline at end of file diff --git a/stage_2/valid/bitwise_zero.c b/stage_2/valid/bitwise_zero.c new file mode 100644 index 0000000..2c2ed2e --- /dev/null +++ b/stage_2/valid/bitwise_zero.c @@ -0,0 +1,3 @@ +int main() { + return ~0; +} \ No newline at end of file diff --git a/stage_2/valid/neg.c b/stage_2/valid/neg.c new file mode 100644 index 0000000..b7ac431 --- /dev/null +++ b/stage_2/valid/neg.c @@ -0,0 +1,3 @@ +int main() { + return -5; +} \ No newline at end of file diff --git a/stage_2/valid/nested_ops.c b/stage_2/valid/nested_ops.c new file mode 100644 index 0000000..9fb3f87 --- /dev/null +++ b/stage_2/valid/nested_ops.c @@ -0,0 +1,3 @@ +int main() { + return !-3; +} \ No newline at end of file diff --git a/stage_2/valid/nested_ops_2.c b/stage_2/valid/nested_ops_2.c new file mode 100644 index 0000000..416d4d1 --- /dev/null +++ b/stage_2/valid/nested_ops_2.c @@ -0,0 +1,3 @@ +int main() { + return -~0; +} \ No newline at end of file diff --git a/stage_2/valid/not_five.c b/stage_2/valid/not_five.c new file mode 100644 index 0000000..df792bb --- /dev/null +++ b/stage_2/valid/not_five.c @@ -0,0 +1,3 @@ +int main() { + return !5; +} \ No newline at end of file diff --git a/stage_2/valid/not_zero.c b/stage_2/valid/not_zero.c new file mode 100644 index 0000000..b6b7cb5 --- /dev/null +++ b/stage_2/valid/not_zero.c @@ -0,0 +1,3 @@ +int main() { + return !0; +} \ No newline at end of file