Implementing breakpoint expression parser

This commit is contained in:
Guy Perfect 2020-08-12 19:59:02 -05:00
parent fa1728b424
commit 53faa1193a
1 changed files with 636 additions and 0 deletions

View File

@ -0,0 +1,636 @@
package vue;
// Java imports
import java.util.*;
// Breakpoint definition
public class Breakpoint {
// Instance fields
private int errCode; // Error type
private int errPosition; // Character of input error
private String errText; // Offending token text
private String expression; // Un-processed input
private boolean isEnabled; // Breakpoint is active
private boolean isValid; // Expression is valid and processed
private String name; // Display name
private Token[] tokens; // Evaluation tokens
///////////////////////////////////////////////////////////////////////////
// Classes //
///////////////////////////////////////////////////////////////////////////
// Operator definition
private static class OpDef {
int id; // Identifier
int precedence; // Operator precedence
int type; // Operator category
// Constructor
OpDef(int precedence, int type, int id) {
this.id = id;
this.precedence = precedence;
this.type = type;
}
}
// Expression token
private static class Token {
int id; // Operator or symbol identifier
Token left; // Left operand
Token parent; // Containing operator
int precedence; // Operator precedence
Token right; // Right operand
int start; // Character position in expression
String text; // Display text
int type; // Token category
Object value; // Literal value
// Constructor
Token(int type, int start, String text) {
this.start = start;
this.text = text;
this.type = type;
}
}
///////////////////////////////////////////////////////////////////////////
// Constants //
///////////////////////////////////////////////////////////////////////////
// Error codes
public static final int NONE = 0;
public static final int BADTOKEN = 1;
public static final int EARLYEOF = 2;
public static final int EMPTY = 3;
public static final int INVALID = 4;
public static final int NESTING = 5;
public static final int UNEXPECTED = 6;
// Token types
private static final int BINARY = 0;
private static final int CLOSE = 1;
private static final int LITERAL = 2;
private static final int OPEN = 3;
private static final int SYMBOL = 4;
private static final int UNARY = 5;
// Expected token modes adjacent to any given token
private static final int MODES_AFTER = 0b010110;
private static final int MODES_BEFORE = 0b000011;
// Token IDs
private static final int ADD = 0;
private static final int BITWISE_AND = 1;
private static final int BITWISE_NOT = 2;
private static final int BITWISE_OR = 3;
private static final int BITWISE_XOR = 4;
private static final int CEIL = 5;
private static final int DIVIDE = 6;
private static final int EQUAL = 7;
private static final int FLOAT = 8;
private static final int FLOOR = 9;
private static final int GREATER_EQUAL_SIGNED = 10;
private static final int GREATER_EQUAL_UNSIGNED = 11;
private static final int GREATER_SIGNED = 12;
private static final int GREATER_UNSIGNED = 13;
private static final int GROUP = 14;
private static final int LESS_EQUAL_SIGNED = 15;
private static final int LESS_EQUAL_UNSIGNED = 16;
private static final int LESS_SIGNED = 17;
private static final int LESS_UNSIGNED = 18;
private static final int LOGICAL_AND = 19;
private static final int LOGICAL_NOT = 20;
private static final int LOGICAL_OR = 21;
private static final int LOGICAL_XOR = 22;
private static final int MULTIPLY = 23;
private static final int NEGATE = 24;
private static final int NOT_EQUAL = 25;
private static final int READ = 26;
private static final int REMAINDER = 27;
private static final int ROUND = 28;
private static final int SHIFT_LEFT = 29;
private static final int SHIFT_RIGHT = 30;
private static final int SHIFT_RIGHT_ARITHMETIC = 31;
private static final int SUBTRACT = 32;
private static final int TRUNC = 33;
private static final int WORD = 34;
private static final int XFLOAT = 35;
private static final int XWORD = 36;
// Token definitions
private static final HashMap<String, OpDef> OPDEFS;
// Static initializer
static {
OPDEFS = new HashMap<String, OpDef>();
OPDEFS.put("(" , new OpDef( 0, OPEN , GROUP ));
OPDEFS.put(")" , new OpDef( 0, CLOSE , GROUP ));
OPDEFS.put("[" , new OpDef( 0, OPEN , READ ));
OPDEFS.put("]" , new OpDef( 0, CLOSE , READ ));
OPDEFS.put("~" , new OpDef( 1, UNARY , BITWISE_NOT ));
OPDEFS.put("!" , new OpDef( 1, UNARY , LOGICAL_NOT ));
OPDEFS.put("-" , new OpDef( 1, UNARY , NEGATE ));
OPDEFS.put("ceil" , new OpDef( 1, UNARY , CEIL ));
OPDEFS.put("float" , new OpDef( 1, UNARY , FLOAT ));
OPDEFS.put("floor" , new OpDef( 1, UNARY , FLOOR ));
OPDEFS.put("round" , new OpDef( 1, UNARY , ROUND ));
OPDEFS.put("trunc" , new OpDef( 1, UNARY , TRUNC ));
OPDEFS.put("word" , new OpDef( 1, UNARY , WORD ));
OPDEFS.put("xfloat", new OpDef( 1, UNARY , XFLOAT ));
OPDEFS.put("xword" , new OpDef( 1, UNARY , XWORD ));
OPDEFS.put("/" , new OpDef( 2, BINARY, DIVIDE ));
OPDEFS.put("*" , new OpDef( 2, BINARY, MULTIPLY ));
OPDEFS.put("%" , new OpDef( 2, BINARY, REMAINDER ));
OPDEFS.put("+" , new OpDef( 3, BINARY, ADD ));
OPDEFS.put("<<" , new OpDef( 4, BINARY, SHIFT_LEFT ));
OPDEFS.put(">>" , new OpDef( 4, BINARY, SHIFT_RIGHT ));
OPDEFS.put(">>>" , new OpDef( 4, BINARY, SHIFT_RIGHT_ARITHMETIC));
OPDEFS.put(">" , new OpDef( 5, BINARY, GREATER_SIGNED ));
OPDEFS.put(">_" , new OpDef( 5, BINARY, GREATER_UNSIGNED ));
OPDEFS.put(">=" , new OpDef( 5, BINARY, GREATER_EQUAL_SIGNED ));
OPDEFS.put(">=_" , new OpDef( 5, BINARY, GREATER_EQUAL_UNSIGNED));
OPDEFS.put("<" , new OpDef( 5, BINARY, LESS_SIGNED ));
OPDEFS.put("<_" , new OpDef( 5, BINARY, LESS_UNSIGNED ));
OPDEFS.put("<=" , new OpDef( 5, BINARY, LESS_EQUAL_SIGNED ));
OPDEFS.put("<=_" , new OpDef( 5, BINARY, LESS_EQUAL_UNSIGNED ));
OPDEFS.put("==" , new OpDef( 6, BINARY, EQUAL ));
OPDEFS.put("!=" , new OpDef( 6, BINARY, NOT_EQUAL ));
OPDEFS.put("&" , new OpDef( 7, BINARY, BITWISE_AND ));
OPDEFS.put("^" , new OpDef( 8, BINARY, BITWISE_XOR ));
OPDEFS.put("|" , new OpDef( 9, BINARY, BITWISE_OR ));
OPDEFS.put("&&" , new OpDef(10, BINARY, LOGICAL_AND ));
OPDEFS.put("^^" , new OpDef(11, BINARY, LOGICAL_XOR ));
OPDEFS.put("||" , new OpDef(12, BINARY, LOGICAL_OR ));
};
///////////////////////////////////////////////////////////////////////////
// Constructors //
///////////////////////////////////////////////////////////////////////////
// Default constructor
public Breakpoint() {
setExpression(null);
name = "";
}
///////////////////////////////////////////////////////////////////////////
// Public Methods //
///////////////////////////////////////////////////////////////////////////
// Retrieve the most recent error code
public int getErrorCode() {
return errCode;
}
// Retrieve the most recent error character position
public int getErrorPosition() {
return errPosition;
}
// Retrieve the most recent error text
public String getErrorText() {
return errText;
}
// Retrieve the most recent input expression
public String getExpression() {
return expression;
}
// Retrieve the display name
public String getName() {
return name;
}
// Determine whether the breakpoint is enabled
public boolean isEnabled() {
return isEnabled;
}
// Determine whether the breakpoint is valid
public boolean isValid() {
return isValid;
}
// Specify and parse an expression
public boolean setExpression(String expression) {
// Configure instance fields
errCode = NONE;
errPosition = 0;
errText = "";
this.expression = expression == null ? expression = "" : expression;
// Process the expression
var tokens = parse();
if (tokens == null || !validate(tokens))
return isValid = false;
tree(tokens);
// Produce an RPN-ordered list of tokens
var tok = tokens.remove(0);
while (tok != null) {
// Traverse to left child node
if (tok.left != null) {
tok = tok.left;
tok.parent.left = null;
continue;
}
// Traverse to right child node
if (tok.right != null) {
tok = tok.right;
tok.parent.right = null;
continue;
}
// No children: add node to output
System.out.println(tok.text);
tokens.add(tok);
tok = tok.parent;
}
this.tokens = tokens.toArray(new Token[tokens.size()]);
// The expression was successfully parsed
return isValid = true;
}
// Specify the display name
public void setName(String name) {
this.name = name == null ? "" : name;
}
///////////////////////////////////////////////////////////////////////////
// Package Methods //
///////////////////////////////////////////////////////////////////////////
// Determine the required stack size to evaluate the expression
int depth() {
// Error checking
if (!isValid)
return 0;
// Count the maximum size of the stack
int max = 0;
int size = 0;
for (var tok : tokens) switch (tok.type) {
case BINARY : size--; break;
case LITERAL: // Fallthrough
case SYMBOL : max = Math.max(max, ++size);
}
return max;
}
///////////////////////////////////////////////////////////////////////////
// Private Methods //
///////////////////////////////////////////////////////////////////////////
// Adjust a float value as needed
private static float fixFloat(float value) {
int bits = Float.floatToRawIntBits(value);
int exp = bits & 0x7F800000;
int digits = bits & 0x007FFFFF;
return
(bits & 0x7FFFFFFF) == 0 || // Zero
exp == 0x7F800000 || // Indefinite
exp == 0 && digits != 0 // Denormal
? 0 : value;
}
// Parse an expression into tokens
private ArrayList<Token> parse() {
var tokens = new ArrayList<Token>();
// Parse the expression
var chars = (expression + " ").toCharArray();
for (int x = 0; x < chars.length; x++) {
char c = chars[x];
// Ignore whitespace
if (c == ' ' || c == '\t')
continue;
// Produce a token based on the first character
Token tok =
c >= '0' && c <= '9' || c == '.' ?
parseLiteral(chars, x) :
c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' ?
parseSymbol (chars, x) :
parseOperator (chars, x)
;
// There was an error processing the token
if (tok == null)
return null;
// Advance to the next token
tokens.add(tok);
x += tok.text.length() - 1;
} // x
// The expression contains no tokens
if (tokens.size() == 0) {
errCode = EMPTY;
errPosition = 1;
errText = "";
return null;
}
return tokens;
}
// Parse a literal
private Token parseLiteral(char[] chars, int start) {
boolean isFloat = chars[start] == '.'; // The figure is a float
boolean isHex = false; // The figure is in hexadecimal
// Process through the end of the expression
for (int x = start + 1; x < chars.length; x++) {
char c = chars[x];
// The literal begins with "0x"
if (c == 'x' || c == 'X') {
// "x" cannot appear here
if (isFloat || x != start + 1 || chars[start] != '0') {
errCode = UNEXPECTED;
errPosition = x + 1;
errText = Character.toString(c);
return null;
}
// Configure as a hexadecimal integer
isHex = true;
continue;
}
// The literal contains "."
if (c == '.') {
// "." cannot appear here
if (isHex || isFloat) {
errCode = UNEXPECTED;
errPosition = x + 1;
errText = Character.toString(c);
return null;
}
// Configure as a float
isFloat = true;
continue;
}
// The character is part of the token
if (
c >= '0' && c <= '9' ||
isHex && (c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F')
) continue;
// Produce a new token
var ret = new Token(LITERAL, start,
new String(chars, start, x - start));
// Parse the literal value
try {
if (isHex) ret.value = (int)
Long.parseLong(ret.text.substring(2), 16);
else if (isFloat) ret.value =
fixFloat(Float.parseFloat(ret.text));
else ret.value =
Integer.parseInt(ret.text);
return ret;
}
// Could not parse the value
catch (Exception e) {
errCode = UNEXPECTED;
errPosition = x + 1;
errText = ret.text;
return null;
}
} // x
return null; // Unreachable
}
// Parse an operator
private Token parseOperator(char[] chars, int start) {
// Process through the end of the expression
for (int x = start + 1; x < chars.length; x++) {
char c = chars[x];
// The character could be part of the token
if (!(
c >= 'a' && c <= 'z' ||
c >= 'A' && c <= 'Z' ||
c >= '0' && c <= '9' ||
c == ' ' || c == '\t'
)) continue;
// Produce a new token
var ret = new Token(0, start, null);
// Find the longest operator match
for (int length = x - start; length >= 1; length--) {
String text = new String(chars, start, length);
var def = OPDEFS.get(text);
// There is no matching operator
if (def == null)
continue;
// A matching operator was found
ret.id = def.id;
ret.precedence = def.precedence;
ret.text = text;
ret.type = def.type;
return ret;
}
// The operator was not identified
errCode = BADTOKEN;
errPosition = start + 1;
errText = Character.toString(chars[start]);
return null;
} // x
return null; // Unreachable
}
// Parse a symbol (which may be an operator)
private Token parseSymbol(char[] chars, int start) {
// Process through the end of the expression
for (int x = start + 1; x < chars.length; x++) {
char c = chars[x];
// The character is part of the token
if (
c >= 'a' && c <= 'z' ||
c >= 'A' && c <= 'Z' ||
c >= '0' && c <= '9' ||
c == '_' || c == '.'
) continue;
// Produce a new token
var ret = new Token(SYMBOL, start,
new String(chars, start, x - start));
// The token is an operator
var def = OPDEFS.get(ret.text.toLowerCase());
if (def != null) {
ret.id = def.id;
ret.precedence = def.precedence;
ret.type = def.type;
return ret;
}
// The token is a symbol
return ret;
} // x
return null; // Unreachable
}
// Build an expression tree from a list of tokens
private void tree(ArrayList<Token> tokens) {
// Process all operators
while (tokens.size() > 1) {
int end = tokens.size() - 1;
int start = 0;
// Locate the bounds of the innermost nested group
for (int x = 0; x < end; x++) {
var tok = tokens.get(x);
if (tok.type == OPEN)
start = x + 1;
if (tok.type != CLOSE)
continue;
end = x - 1;
break;
}
// Apply unary operators
for (int x = end; x >= start; x--) {
var tok = tokens.get(x);
if (tok.right != null || tok.type != UNARY)
continue;
tok.right = tokens.remove(x + 1);
tok.right.parent = tok;
end--;
}
// Apply binary operators
while (start != end) {
int index = -1;
Token tok = null;
// Locate the left-most operator with the highest precedence
for (int x = start; x < end; x++) {
var tik = tokens.get(x);
if (tik.right != null || tik.type != BINARY ||
tok != null && tik.precedence >= tok.precedence)
continue;
index = x;
tok = tik;
}
// Apply the operator
tok.right = tokens.remove(index + 1);
tok.left = tokens.remove(index - 1);
tok.left.parent = tok.right.parent = tok;
end -= 2;
}
// There are no group operators
if (tokens.size() == 1)
break;
// Apply the group operators
tokens.remove(end + 1);
if (tokens.remove(start - 1).id == READ) {
var tok = new Token(UNARY, 0, "{Read Word}");
tok.right = tokens.remove(start - 1);
tok.right.parent = tok;
tokens.add(start - 1, tok);
}
} // size
}
// Ensure a sequence of tokens is valid
private boolean validate(ArrayList<Token> tokens) {
int mode = 0;
var stack = new Stack<Token>();
// Validate all tokens
for (var tok : tokens) {
// Expected token mode mismatch
if ((MODES_BEFORE >> tok.type & 1) != mode) {
// The token is invalid
if (tok.id != NEGATE) {
errCode = INVALID;
errPosition = tok.start;
errText = tok.text;
return false;
}
// Convert negate to subtract
tok.id = SUBTRACT;
tok.precedence = 3;
tok.type = BINARY;
}
// Nesting error
if (tok.type == CLOSE &&
(stack.empty() || stack.pop().id != tok.id)) {
errCode = NESTING;
errPosition = tok.start;
errText = tok.text;
return false;
}
// The token opens a group
if (tok.type == OPEN)
stack.push(tok);
// The token is valid
mode = MODES_AFTER >> tok.type & 1;
continue;
}
// A group was not closed
if (!stack.empty()) {
errCode = EARLYEOF;
errPosition = expression.length();
errText = stack.pop().text;
}
// Successfully parsed the expression
return true;
}
}