Tokenizer uses single process step and length-based exhaustion check
Issue: SPR-16032
This commit is contained in:
parent
b2017bb1ec
commit
03b68286a2
|
|
@ -124,8 +124,7 @@ class InternalSpelExpressionParser extends TemplateAwareExpressionParser {
|
||||||
try {
|
try {
|
||||||
this.expressionString = expressionString;
|
this.expressionString = expressionString;
|
||||||
Tokenizer tokenizer = new Tokenizer(expressionString);
|
Tokenizer tokenizer = new Tokenizer(expressionString);
|
||||||
tokenizer.process();
|
this.tokenStream = tokenizer.process();
|
||||||
this.tokenStream = tokenizer.getTokens();
|
|
||||||
this.tokenStreamLength = this.tokenStream.size();
|
this.tokenStreamLength = this.tokenStream.size();
|
||||||
this.tokenStreamPointer = 0;
|
this.tokenStreamPointer = 0;
|
||||||
this.constructedNodes.clear();
|
this.constructedNodes.clear();
|
||||||
|
|
|
||||||
|
|
@ -28,14 +28,15 @@ import org.springframework.expression.spel.SpelParseException;
|
||||||
* Lex some input data into a stream of tokens that can then be parsed.
|
* Lex some input data into a stream of tokens that can then be parsed.
|
||||||
*
|
*
|
||||||
* @author Andy Clement
|
* @author Andy Clement
|
||||||
|
* @author Juergen Hoeller
|
||||||
* @author Phillip Webb
|
* @author Phillip Webb
|
||||||
* @since 3.0
|
* @since 3.0
|
||||||
*/
|
*/
|
||||||
class Tokenizer {
|
class Tokenizer {
|
||||||
|
|
||||||
// if this is changed, it must remain sorted
|
// If this gets changed, it must remain sorted...
|
||||||
private static final String[] ALTERNATIVE_OPERATOR_NAMES = { "DIV", "EQ", "GE", "GT",
|
private static final String[] ALTERNATIVE_OPERATOR_NAMES =
|
||||||
"LE", "LT", "MOD", "NE", "NOT" };
|
{"DIV", "EQ", "GE", "GT", "LE", "LT", "MOD", "NE", "NOT"};
|
||||||
|
|
||||||
private static final byte FLAGS[] = new byte[256];
|
private static final byte FLAGS[] = new byte[256];
|
||||||
|
|
||||||
|
|
@ -64,29 +65,28 @@ class Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
String expressionString;
|
private String expressionString;
|
||||||
|
|
||||||
char[] toProcess;
|
private char[] charsToProcess;
|
||||||
|
|
||||||
int pos;
|
private int pos;
|
||||||
|
|
||||||
int max;
|
private int max;
|
||||||
|
|
||||||
List<Token> tokens = new ArrayList<>();
|
private List<Token> tokens = new ArrayList<>();
|
||||||
|
|
||||||
|
|
||||||
public Tokenizer(String inputData) {
|
public Tokenizer(String inputData) {
|
||||||
this.expressionString = inputData;
|
this.expressionString = inputData;
|
||||||
this.toProcess = (inputData + "\0").toCharArray();
|
this.charsToProcess = (inputData + "\0").toCharArray();
|
||||||
this.max = this.toProcess.length;
|
this.max = this.charsToProcess.length;
|
||||||
this.pos = 0;
|
this.pos = 0;
|
||||||
process();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void process() {
|
public List<Token> process() {
|
||||||
while (this.pos < this.max) {
|
while (this.pos < this.max) {
|
||||||
char ch = this.toProcess[this.pos];
|
char ch = this.charsToProcess[this.pos];
|
||||||
if (isAlphabetic(ch)) {
|
if (isAlphabetic(ch)) {
|
||||||
lexIdentifier();
|
lexIdentifier();
|
||||||
}
|
}
|
||||||
|
|
@ -190,9 +190,7 @@ class Tokenizer {
|
||||||
break;
|
break;
|
||||||
case '|':
|
case '|':
|
||||||
if (!isTwoCharToken(TokenKind.SYMBOLIC_OR)) {
|
if (!isTwoCharToken(TokenKind.SYMBOLIC_OR)) {
|
||||||
throw new InternalParseException(new SpelParseException(
|
raiseParseException(this.pos, SpelMessage.MISSING_CHARACTER, "|");
|
||||||
this.expressionString, this.pos, SpelMessage.MISSING_CHARACTER,
|
|
||||||
"|"));
|
|
||||||
}
|
}
|
||||||
pushPairToken(TokenKind.SYMBOLIC_OR);
|
pushPairToken(TokenKind.SYMBOLIC_OR);
|
||||||
break;
|
break;
|
||||||
|
|
@ -264,38 +262,35 @@ class Tokenizer {
|
||||||
this.pos++; // will take us to the end
|
this.pos++; // will take us to the end
|
||||||
break;
|
break;
|
||||||
case '\\':
|
case '\\':
|
||||||
throw new InternalParseException(
|
raiseParseException(this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR);
|
||||||
new SpelParseException(this.expressionString, this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR));
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Cannot handle (" + Integer.valueOf(ch) + ") '" + ch + "'");
|
throw new IllegalStateException("Cannot handle (" + Integer.valueOf(ch) + ") '" + ch + "'");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public List<Token> getTokens() {
|
|
||||||
return this.tokens;
|
return this.tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
|
// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
|
||||||
private void lexQuotedStringLiteral() {
|
private void lexQuotedStringLiteral() {
|
||||||
int start = this.pos;
|
int start = this.pos;
|
||||||
boolean terminated = false;
|
boolean terminated = false;
|
||||||
while (!terminated) {
|
while (!terminated) {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
char ch = this.toProcess[this.pos];
|
char ch = this.charsToProcess[this.pos];
|
||||||
if (ch == '\'') {
|
if (ch == '\'') {
|
||||||
// may not be the end if the char after is also a '
|
// may not be the end if the char after is also a '
|
||||||
if (this.toProcess[this.pos + 1] == '\'') {
|
if (this.charsToProcess[this.pos + 1] == '\'') {
|
||||||
this.pos++; // skip over that too, and continue
|
this.pos++; // skip over that too, and continue
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
terminated = true;
|
terminated = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ch == 0) {
|
if (isExhausted()) {
|
||||||
throw new InternalParseException(new SpelParseException(this.expressionString, start,
|
raiseParseException(start, SpelMessage.NON_TERMINATING_QUOTED_STRING);
|
||||||
SpelMessage.NON_TERMINATING_QUOTED_STRING));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.pos++;
|
this.pos++;
|
||||||
|
|
@ -308,19 +303,18 @@ class Tokenizer {
|
||||||
boolean terminated = false;
|
boolean terminated = false;
|
||||||
while (!terminated) {
|
while (!terminated) {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
char ch = this.toProcess[this.pos];
|
char ch = this.charsToProcess[this.pos];
|
||||||
if (ch == '"') {
|
if (ch == '"') {
|
||||||
// may not be the end if the char after is also a "
|
// may not be the end if the char after is also a "
|
||||||
if (this.toProcess[this.pos + 1] == '"') {
|
if (this.charsToProcess[this.pos + 1] == '"') {
|
||||||
this.pos++; // skip over that too, and continue
|
this.pos++; // skip over that too, and continue
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
terminated = true;
|
terminated = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ch == 0) {
|
if (isExhausted()) {
|
||||||
throw new InternalParseException(new SpelParseException(this.expressionString,
|
raiseParseException(start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING);
|
||||||
start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.pos++;
|
this.pos++;
|
||||||
|
|
@ -346,7 +340,7 @@ class Tokenizer {
|
||||||
private void lexNumericLiteral(boolean firstCharIsZero) {
|
private void lexNumericLiteral(boolean firstCharIsZero) {
|
||||||
boolean isReal = false;
|
boolean isReal = false;
|
||||||
int start = this.pos;
|
int start = this.pos;
|
||||||
char ch = this.toProcess[this.pos + 1];
|
char ch = this.charsToProcess[this.pos + 1];
|
||||||
boolean isHex = ch == 'x' || ch == 'X';
|
boolean isHex = ch == 'x' || ch == 'X';
|
||||||
|
|
||||||
// deal with hexadecimal
|
// deal with hexadecimal
|
||||||
|
|
@ -355,7 +349,7 @@ class Tokenizer {
|
||||||
do {
|
do {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
while (isHexadecimalDigit(this.toProcess[this.pos]));
|
while (isHexadecimalDigit(this.charsToProcess[this.pos]));
|
||||||
if (isChar('L', 'l')) {
|
if (isChar('L', 'l')) {
|
||||||
pushHexIntToken(subarray(start + 2, this.pos), true, start, this.pos);
|
pushHexIntToken(subarray(start + 2, this.pos), true, start, this.pos);
|
||||||
this.pos++;
|
this.pos++;
|
||||||
|
|
@ -372,10 +366,10 @@ class Tokenizer {
|
||||||
do {
|
do {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
while (isDigit(this.toProcess[this.pos]));
|
while (isDigit(this.charsToProcess[this.pos]));
|
||||||
|
|
||||||
// a '.' indicates this number is a real
|
// a '.' indicates this number is a real
|
||||||
ch = this.toProcess[this.pos];
|
ch = this.charsToProcess[this.pos];
|
||||||
if (ch == '.') {
|
if (ch == '.') {
|
||||||
isReal = true;
|
isReal = true;
|
||||||
int dotpos = this.pos;
|
int dotpos = this.pos;
|
||||||
|
|
@ -383,7 +377,7 @@ class Tokenizer {
|
||||||
do {
|
do {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
while (isDigit(this.toProcess[this.pos]));
|
while (isDigit(this.charsToProcess[this.pos]));
|
||||||
if (this.pos == dotpos + 1) {
|
if (this.pos == dotpos + 1) {
|
||||||
// the number is something like '3.'. It is really an int but may be
|
// the number is something like '3.'. It is really an int but may be
|
||||||
// part of something like '3.toString()'. In this case process it as
|
// part of something like '3.toString()'. In this case process it as
|
||||||
|
|
@ -398,19 +392,18 @@ class Tokenizer {
|
||||||
|
|
||||||
// Now there may or may not be an exponent
|
// Now there may or may not be an exponent
|
||||||
|
|
||||||
// is it a long ?
|
// Is it a long ?
|
||||||
if (isChar('L', 'l')) {
|
if (isChar('L', 'l')) {
|
||||||
if (isReal) { // 3.4L - not allowed
|
if (isReal) { // 3.4L - not allowed
|
||||||
throw new InternalParseException(new SpelParseException(this.expressionString,
|
raiseParseException(start, SpelMessage.REAL_CANNOT_BE_LONG);
|
||||||
start, SpelMessage.REAL_CANNOT_BE_LONG));
|
|
||||||
}
|
}
|
||||||
pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
|
pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
else if (isExponentChar(this.toProcess[this.pos])) {
|
else if (isExponentChar(this.charsToProcess[this.pos])) {
|
||||||
isReal = true; // if it wasn't before, it is now
|
isReal = true; // if it wasn't before, it is now
|
||||||
this.pos++;
|
this.pos++;
|
||||||
char possibleSign = this.toProcess[this.pos];
|
char possibleSign = this.charsToProcess[this.pos];
|
||||||
if (isSign(possibleSign)) {
|
if (isSign(possibleSign)) {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
|
|
@ -419,19 +412,19 @@ class Tokenizer {
|
||||||
do {
|
do {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
while (isDigit(this.toProcess[this.pos]));
|
while (isDigit(this.charsToProcess[this.pos]));
|
||||||
boolean isFloat = false;
|
boolean isFloat = false;
|
||||||
if (isFloatSuffix(this.toProcess[this.pos])) {
|
if (isFloatSuffix(this.charsToProcess[this.pos])) {
|
||||||
isFloat = true;
|
isFloat = true;
|
||||||
endOfNumber = ++this.pos;
|
endOfNumber = ++this.pos;
|
||||||
}
|
}
|
||||||
else if (isDoubleSuffix(this.toProcess[this.pos])) {
|
else if (isDoubleSuffix(this.charsToProcess[this.pos])) {
|
||||||
endOfNumber = ++this.pos;
|
endOfNumber = ++this.pos;
|
||||||
}
|
}
|
||||||
pushRealToken(subarray(start, this.pos), isFloat, start, this.pos);
|
pushRealToken(subarray(start, this.pos), isFloat, start, this.pos);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ch = this.toProcess[this.pos];
|
ch = this.charsToProcess[this.pos];
|
||||||
boolean isFloat = false;
|
boolean isFloat = false;
|
||||||
if (isFloatSuffix(ch)) {
|
if (isFloatSuffix(ch)) {
|
||||||
isReal = true;
|
isReal = true;
|
||||||
|
|
@ -456,7 +449,7 @@ class Tokenizer {
|
||||||
do {
|
do {
|
||||||
this.pos++;
|
this.pos++;
|
||||||
}
|
}
|
||||||
while (isIdentifier(this.toProcess[this.pos]));
|
while (isIdentifier(this.charsToProcess[this.pos]));
|
||||||
char[] subarray = subarray(start, this.pos);
|
char[] subarray = subarray(start, this.pos);
|
||||||
|
|
||||||
// Check if this is the alternative (textual) representation of an operator (see
|
// Check if this is the alternative (textual) representation of an operator (see
|
||||||
|
|
@ -484,14 +477,10 @@ class Tokenizer {
|
||||||
private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
|
private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
|
||||||
if (data.length == 0) {
|
if (data.length == 0) {
|
||||||
if (isLong) {
|
if (isLong) {
|
||||||
throw new InternalParseException(new SpelParseException(this.expressionString,
|
raiseParseException(start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start, end + 1));
|
||||||
start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start,
|
|
||||||
end + 1)));
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new InternalParseException(new SpelParseException(this.expressionString,
|
raiseParseException(start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(start, end));
|
||||||
start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(
|
|
||||||
start, end)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (isLong) {
|
if (isLong) {
|
||||||
|
|
@ -513,7 +502,7 @@ class Tokenizer {
|
||||||
|
|
||||||
private char[] subarray(int start, int end) {
|
private char[] subarray(int start, int end) {
|
||||||
char[] result = new char[end - start];
|
char[] result = new char[end - start];
|
||||||
System.arraycopy(this.toProcess, start, result, 0, end - start);
|
System.arraycopy(this.charsToProcess, start, result, 0, end - start);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -522,8 +511,8 @@ class Tokenizer {
|
||||||
*/
|
*/
|
||||||
private boolean isTwoCharToken(TokenKind kind) {
|
private boolean isTwoCharToken(TokenKind kind) {
|
||||||
return (kind.tokenChars.length == 2 &&
|
return (kind.tokenChars.length == 2 &&
|
||||||
this.toProcess[this.pos] == kind.tokenChars[0] &&
|
this.charsToProcess[this.pos] == kind.tokenChars[0] &&
|
||||||
this.toProcess[this.pos + 1] == kind.tokenChars[1]);
|
this.charsToProcess[this.pos + 1] == kind.tokenChars[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -552,7 +541,7 @@ class Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isChar(char a, char b) {
|
private boolean isChar(char a, char b) {
|
||||||
char ch = this.toProcess[this.pos];
|
char ch = this.charsToProcess[this.pos];
|
||||||
return ch == a || ch == b;
|
return ch == a || ch == b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -593,4 +582,12 @@ class Tokenizer {
|
||||||
return (FLAGS[ch] & IS_HEXDIGIT) != 0;
|
return (FLAGS[ch] & IS_HEXDIGIT) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isExhausted() {
|
||||||
|
return (this.pos == this.max - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void raiseParseException(int start, SpelMessage msg, Object... inserts) {
|
||||||
|
throw new InternalParseException(new SpelParseException(this.expressionString, start, msg, inserts));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ import org.springframework.expression.spel.support.ReflectivePropertyAccessor;
|
||||||
import org.springframework.expression.spel.support.StandardEvaluationContext;
|
import org.springframework.expression.spel.support.StandardEvaluationContext;
|
||||||
import org.springframework.expression.spel.support.StandardTypeLocator;
|
import org.springframework.expression.spel.support.StandardTypeLocator;
|
||||||
import org.springframework.expression.spel.testresources.le.div.mod.reserved.Reserver;
|
import org.springframework.expression.spel.testresources.le.div.mod.reserved.Reserver;
|
||||||
|
import org.springframework.util.ObjectUtils;
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.*;
|
import static org.hamcrest.Matchers.*;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
@ -2095,6 +2096,16 @@ public class SpelReproTests extends AbstractExpressionTests {
|
||||||
assertEquals(StandardCharsets.UTF_8, result);
|
assertEquals(StandardCharsets.UTF_8, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void SPR16032() {
|
||||||
|
EvaluationContext context = new StandardEvaluationContext();
|
||||||
|
context.setVariable("str", "a\0b");
|
||||||
|
|
||||||
|
Expression ex = parser.parseExpression("#str?.split('\0')");
|
||||||
|
Object result = ex.getValue(context);
|
||||||
|
assertTrue(ObjectUtils.nullSafeEquals(result, new String[] {"a", "b"}));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static class ListOf {
|
public static class ListOf {
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue