From 53faa1193af62ccf72869b10727437af466d8fb2 Mon Sep 17 00:00:00 2001 From: Guy Perfect Date: Wed, 12 Aug 2020 19:59:02 -0500 Subject: [PATCH] Implementing breakpoint expression parser --- src/desktop/vue/Breakpoint.java | 636 ++++++++++++++++++++++++++++++++ 1 file changed, 636 insertions(+) create mode 100644 src/desktop/vue/Breakpoint.java diff --git a/src/desktop/vue/Breakpoint.java b/src/desktop/vue/Breakpoint.java new file mode 100644 index 0000000..784fedd --- /dev/null +++ b/src/desktop/vue/Breakpoint.java @@ -0,0 +1,636 @@ +package vue; + +// Java imports +import java.util.*; + +// Breakpoint definition +public class Breakpoint { + + // Instance fields + private int errCode; // Error type + private int errPosition; // Character of input error + private String errText; // Offending token text + private String expression; // Un-processed input + private boolean isEnabled; // Breakpoint is active + private boolean isValid; // Expression is valid and processed + private String name; // Display name + private Token[] tokens; // Evaluation tokens + + + + /////////////////////////////////////////////////////////////////////////// + // Classes // + /////////////////////////////////////////////////////////////////////////// + + // Operator definition + private static class OpDef { + int id; // Identifier + int precedence; // Operator precedence + int type; // Operator category + + // Constructor + OpDef(int precedence, int type, int id) { + this.id = id; + this.precedence = precedence; + this.type = type; + } + + } + + // Expression token + private static class Token { + int id; // Operator or symbol identifier + Token left; // Left operand + Token parent; // Containing operator + int precedence; // Operator precedence + Token right; // Right operand + int start; // Character position in expression + String text; // Display text + int type; // Token category + Object value; // Literal value + + // Constructor + Token(int type, int start, String text) { + this.start = start; + this.text = text; + this.type = type; + } + + } + + + + /////////////////////////////////////////////////////////////////////////// + // Constants // + /////////////////////////////////////////////////////////////////////////// + + // Error codes + public static final int NONE = 0; + public static final int BADTOKEN = 1; + public static final int EARLYEOF = 2; + public static final int EMPTY = 3; + public static final int INVALID = 4; + public static final int NESTING = 5; + public static final int UNEXPECTED = 6; + + // Token types + private static final int BINARY = 0; + private static final int CLOSE = 1; + private static final int LITERAL = 2; + private static final int OPEN = 3; + private static final int SYMBOL = 4; + private static final int UNARY = 5; + + // Expected token modes adjacent to any given token + private static final int MODES_AFTER = 0b010110; + private static final int MODES_BEFORE = 0b000011; + + // Token IDs + private static final int ADD = 0; + private static final int BITWISE_AND = 1; + private static final int BITWISE_NOT = 2; + private static final int BITWISE_OR = 3; + private static final int BITWISE_XOR = 4; + private static final int CEIL = 5; + private static final int DIVIDE = 6; + private static final int EQUAL = 7; + private static final int FLOAT = 8; + private static final int FLOOR = 9; + private static final int GREATER_EQUAL_SIGNED = 10; + private static final int GREATER_EQUAL_UNSIGNED = 11; + private static final int GREATER_SIGNED = 12; + private static final int GREATER_UNSIGNED = 13; + private static final int GROUP = 14; + private static final int LESS_EQUAL_SIGNED = 15; + private static final int LESS_EQUAL_UNSIGNED = 16; + private static final int LESS_SIGNED = 17; + private static final int LESS_UNSIGNED = 18; + private static final int LOGICAL_AND = 19; + private static final int LOGICAL_NOT = 20; + private static final int LOGICAL_OR = 21; + private static final int LOGICAL_XOR = 22; + private static final int MULTIPLY = 23; + private static final int NEGATE = 24; + private static final int NOT_EQUAL = 25; + private static final int READ = 26; + private static final int REMAINDER = 27; + private static final int ROUND = 28; + private static final int SHIFT_LEFT = 29; + private static final int SHIFT_RIGHT = 30; + private static final int SHIFT_RIGHT_ARITHMETIC = 31; + private static final int SUBTRACT = 32; + private static final int TRUNC = 33; + private static final int WORD = 34; + private static final int XFLOAT = 35; + private static final int XWORD = 36; + + // Token definitions + private static final HashMap OPDEFS; + + // Static initializer + static { + OPDEFS = new HashMap(); + OPDEFS.put("(" , new OpDef( 0, OPEN , GROUP )); + OPDEFS.put(")" , new OpDef( 0, CLOSE , GROUP )); + OPDEFS.put("[" , new OpDef( 0, OPEN , READ )); + OPDEFS.put("]" , new OpDef( 0, CLOSE , READ )); + OPDEFS.put("~" , new OpDef( 1, UNARY , BITWISE_NOT )); + OPDEFS.put("!" , new OpDef( 1, UNARY , LOGICAL_NOT )); + OPDEFS.put("-" , new OpDef( 1, UNARY , NEGATE )); + OPDEFS.put("ceil" , new OpDef( 1, UNARY , CEIL )); + OPDEFS.put("float" , new OpDef( 1, UNARY , FLOAT )); + OPDEFS.put("floor" , new OpDef( 1, UNARY , FLOOR )); + OPDEFS.put("round" , new OpDef( 1, UNARY , ROUND )); + OPDEFS.put("trunc" , new OpDef( 1, UNARY , TRUNC )); + OPDEFS.put("word" , new OpDef( 1, UNARY , WORD )); + OPDEFS.put("xfloat", new OpDef( 1, UNARY , XFLOAT )); + OPDEFS.put("xword" , new OpDef( 1, UNARY , XWORD )); + OPDEFS.put("/" , new OpDef( 2, BINARY, DIVIDE )); + OPDEFS.put("*" , new OpDef( 2, BINARY, MULTIPLY )); + OPDEFS.put("%" , new OpDef( 2, BINARY, REMAINDER )); + OPDEFS.put("+" , new OpDef( 3, BINARY, ADD )); + OPDEFS.put("<<" , new OpDef( 4, BINARY, SHIFT_LEFT )); + OPDEFS.put(">>" , new OpDef( 4, BINARY, SHIFT_RIGHT )); + OPDEFS.put(">>>" , new OpDef( 4, BINARY, SHIFT_RIGHT_ARITHMETIC)); + OPDEFS.put(">" , new OpDef( 5, BINARY, GREATER_SIGNED )); + OPDEFS.put(">_" , new OpDef( 5, BINARY, GREATER_UNSIGNED )); + OPDEFS.put(">=" , new OpDef( 5, BINARY, GREATER_EQUAL_SIGNED )); + OPDEFS.put(">=_" , new OpDef( 5, BINARY, GREATER_EQUAL_UNSIGNED)); + OPDEFS.put("<" , new OpDef( 5, BINARY, LESS_SIGNED )); + OPDEFS.put("<_" , new OpDef( 5, BINARY, LESS_UNSIGNED )); + OPDEFS.put("<=" , new OpDef( 5, BINARY, LESS_EQUAL_SIGNED )); + OPDEFS.put("<=_" , new OpDef( 5, BINARY, LESS_EQUAL_UNSIGNED )); + OPDEFS.put("==" , new OpDef( 6, BINARY, EQUAL )); + OPDEFS.put("!=" , new OpDef( 6, BINARY, NOT_EQUAL )); + OPDEFS.put("&" , new OpDef( 7, BINARY, BITWISE_AND )); + OPDEFS.put("^" , new OpDef( 8, BINARY, BITWISE_XOR )); + OPDEFS.put("|" , new OpDef( 9, BINARY, BITWISE_OR )); + OPDEFS.put("&&" , new OpDef(10, BINARY, LOGICAL_AND )); + OPDEFS.put("^^" , new OpDef(11, BINARY, LOGICAL_XOR )); + OPDEFS.put("||" , new OpDef(12, BINARY, LOGICAL_OR )); + }; + + + + /////////////////////////////////////////////////////////////////////////// + // Constructors // + /////////////////////////////////////////////////////////////////////////// + + // Default constructor + public Breakpoint() { + setExpression(null); + name = ""; + } + + + + /////////////////////////////////////////////////////////////////////////// + // Public Methods // + /////////////////////////////////////////////////////////////////////////// + + // Retrieve the most recent error code + public int getErrorCode() { + return errCode; + } + + // Retrieve the most recent error character position + public int getErrorPosition() { + return errPosition; + } + + // Retrieve the most recent error text + public String getErrorText() { + return errText; + } + + // Retrieve the most recent input expression + public String getExpression() { + return expression; + } + + // Retrieve the display name + public String getName() { + return name; + } + + // Determine whether the breakpoint is enabled + public boolean isEnabled() { + return isEnabled; + } + + // Determine whether the breakpoint is valid + public boolean isValid() { + return isValid; + } + + // Specify and parse an expression + public boolean setExpression(String expression) { + + // Configure instance fields + errCode = NONE; + errPosition = 0; + errText = ""; + this.expression = expression == null ? expression = "" : expression; + + // Process the expression + var tokens = parse(); + if (tokens == null || !validate(tokens)) + return isValid = false; + tree(tokens); + + // Produce an RPN-ordered list of tokens + var tok = tokens.remove(0); + while (tok != null) { + + // Traverse to left child node + if (tok.left != null) { + tok = tok.left; + tok.parent.left = null; + continue; + } + + // Traverse to right child node + if (tok.right != null) { + tok = tok.right; + tok.parent.right = null; + continue; + } + + // No children: add node to output + System.out.println(tok.text); + tokens.add(tok); + tok = tok.parent; + } + this.tokens = tokens.toArray(new Token[tokens.size()]); + + // The expression was successfully parsed + return isValid = true; + } + + // Specify the display name + public void setName(String name) { + this.name = name == null ? "" : name; + } + + + + /////////////////////////////////////////////////////////////////////////// + // Package Methods // + /////////////////////////////////////////////////////////////////////////// + + // Determine the required stack size to evaluate the expression + int depth() { + + // Error checking + if (!isValid) + return 0; + + // Count the maximum size of the stack + int max = 0; + int size = 0; + for (var tok : tokens) switch (tok.type) { + case BINARY : size--; break; + case LITERAL: // Fallthrough + case SYMBOL : max = Math.max(max, ++size); + } + return max; + } + + + + /////////////////////////////////////////////////////////////////////////// + // Private Methods // + /////////////////////////////////////////////////////////////////////////// + + // Adjust a float value as needed + private static float fixFloat(float value) { + int bits = Float.floatToRawIntBits(value); + int exp = bits & 0x7F800000; + int digits = bits & 0x007FFFFF; + return + (bits & 0x7FFFFFFF) == 0 || // Zero + exp == 0x7F800000 || // Indefinite + exp == 0 && digits != 0 // Denormal + ? 0 : value; + } + + // Parse an expression into tokens + private ArrayList parse() { + var tokens = new ArrayList(); + + // Parse the expression + var chars = (expression + " ").toCharArray(); + for (int x = 0; x < chars.length; x++) { + char c = chars[x]; + + // Ignore whitespace + if (c == ' ' || c == '\t') + continue; + + // Produce a token based on the first character + Token tok = + c >= '0' && c <= '9' || c == '.' ? + parseLiteral(chars, x) : + c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' ? + parseSymbol (chars, x) : + parseOperator (chars, x) + ; + + // There was an error processing the token + if (tok == null) + return null; + + // Advance to the next token + tokens.add(tok); + x += tok.text.length() - 1; + } // x + + // The expression contains no tokens + if (tokens.size() == 0) { + errCode = EMPTY; + errPosition = 1; + errText = ""; + return null; + } + + return tokens; + } + + // Parse a literal + private Token parseLiteral(char[] chars, int start) { + boolean isFloat = chars[start] == '.'; // The figure is a float + boolean isHex = false; // The figure is in hexadecimal + + // Process through the end of the expression + for (int x = start + 1; x < chars.length; x++) { + char c = chars[x]; + + // The literal begins with "0x" + if (c == 'x' || c == 'X') { + + // "x" cannot appear here + if (isFloat || x != start + 1 || chars[start] != '0') { + errCode = UNEXPECTED; + errPosition = x + 1; + errText = Character.toString(c); + return null; + } + + // Configure as a hexadecimal integer + isHex = true; + continue; + } + + // The literal contains "." + if (c == '.') { + + // "." cannot appear here + if (isHex || isFloat) { + errCode = UNEXPECTED; + errPosition = x + 1; + errText = Character.toString(c); + return null; + } + + // Configure as a float + isFloat = true; + continue; + } + + // The character is part of the token + if ( + c >= '0' && c <= '9' || + isHex && (c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F') + ) continue; + + // Produce a new token + var ret = new Token(LITERAL, start, + new String(chars, start, x - start)); + + // Parse the literal value + try { + if (isHex) ret.value = (int) + Long.parseLong(ret.text.substring(2), 16); + else if (isFloat) ret.value = + fixFloat(Float.parseFloat(ret.text)); + else ret.value = + Integer.parseInt(ret.text); + return ret; + } + + // Could not parse the value + catch (Exception e) { + errCode = UNEXPECTED; + errPosition = x + 1; + errText = ret.text; + return null; + } + + } // x + + return null; // Unreachable + } + + // Parse an operator + private Token parseOperator(char[] chars, int start) { + + // Process through the end of the expression + for (int x = start + 1; x < chars.length; x++) { + char c = chars[x]; + + // The character could be part of the token + if (!( + c >= 'a' && c <= 'z' || + c >= 'A' && c <= 'Z' || + c >= '0' && c <= '9' || + c == ' ' || c == '\t' + )) continue; + + // Produce a new token + var ret = new Token(0, start, null); + + // Find the longest operator match + for (int length = x - start; length >= 1; length--) { + String text = new String(chars, start, length); + var def = OPDEFS.get(text); + + // There is no matching operator + if (def == null) + continue; + + // A matching operator was found + ret.id = def.id; + ret.precedence = def.precedence; + ret.text = text; + ret.type = def.type; + return ret; + } + + // The operator was not identified + errCode = BADTOKEN; + errPosition = start + 1; + errText = Character.toString(chars[start]); + return null; + } // x + + return null; // Unreachable + } + + // Parse a symbol (which may be an operator) + private Token parseSymbol(char[] chars, int start) { + + // Process through the end of the expression + for (int x = start + 1; x < chars.length; x++) { + char c = chars[x]; + + // The character is part of the token + if ( + c >= 'a' && c <= 'z' || + c >= 'A' && c <= 'Z' || + c >= '0' && c <= '9' || + c == '_' || c == '.' + ) continue; + + // Produce a new token + var ret = new Token(SYMBOL, start, + new String(chars, start, x - start)); + + // The token is an operator + var def = OPDEFS.get(ret.text.toLowerCase()); + if (def != null) { + ret.id = def.id; + ret.precedence = def.precedence; + ret.type = def.type; + return ret; + } + + // The token is a symbol + return ret; + } // x + + return null; // Unreachable + } + + // Build an expression tree from a list of tokens + private void tree(ArrayList tokens) { + + // Process all operators + while (tokens.size() > 1) { + int end = tokens.size() - 1; + int start = 0; + + // Locate the bounds of the innermost nested group + for (int x = 0; x < end; x++) { + var tok = tokens.get(x); + if (tok.type == OPEN) + start = x + 1; + if (tok.type != CLOSE) + continue; + end = x - 1; + break; + } + + // Apply unary operators + for (int x = end; x >= start; x--) { + var tok = tokens.get(x); + if (tok.right != null || tok.type != UNARY) + continue; + tok.right = tokens.remove(x + 1); + tok.right.parent = tok; + end--; + } + + // Apply binary operators + while (start != end) { + int index = -1; + Token tok = null; + + // Locate the left-most operator with the highest precedence + for (int x = start; x < end; x++) { + var tik = tokens.get(x); + if (tik.right != null || tik.type != BINARY || + tok != null && tik.precedence >= tok.precedence) + continue; + index = x; + tok = tik; + } + + // Apply the operator + tok.right = tokens.remove(index + 1); + tok.left = tokens.remove(index - 1); + tok.left.parent = tok.right.parent = tok; + end -= 2; + } + + // There are no group operators + if (tokens.size() == 1) + break; + + // Apply the group operators + tokens.remove(end + 1); + if (tokens.remove(start - 1).id == READ) { + var tok = new Token(UNARY, 0, "{Read Word}"); + tok.right = tokens.remove(start - 1); + tok.right.parent = tok; + tokens.add(start - 1, tok); + } + + } // size + + } + + // Ensure a sequence of tokens is valid + private boolean validate(ArrayList tokens) { + int mode = 0; + var stack = new Stack(); + + // Validate all tokens + for (var tok : tokens) { + + // Expected token mode mismatch + if ((MODES_BEFORE >> tok.type & 1) != mode) { + + // The token is invalid + if (tok.id != NEGATE) { + errCode = INVALID; + errPosition = tok.start; + errText = tok.text; + return false; + } + + // Convert negate to subtract + tok.id = SUBTRACT; + tok.precedence = 3; + tok.type = BINARY; + } + + // Nesting error + if (tok.type == CLOSE && + (stack.empty() || stack.pop().id != tok.id)) { + errCode = NESTING; + errPosition = tok.start; + errText = tok.text; + return false; + } + + // The token opens a group + if (tok.type == OPEN) + stack.push(tok); + + // The token is valid + mode = MODES_AFTER >> tok.type & 1; + continue; + } + + // A group was not closed + if (!stack.empty()) { + errCode = EARLYEOF; + errPosition = expression.length(); + errText = stack.pop().text; + } + + // Successfully parsed the expression + return true; + } + +}