Tokenizer uses single process step and length-based exhaustion check

Issue: SPR-16032
This commit is contained in:
Juergen Hoeller 2017-10-04 12:23:16 +02:00
parent b2017bb1ec
commit 03b68286a2
3 changed files with 70 additions and 63 deletions

View File

@ -124,8 +124,7 @@ class InternalSpelExpressionParser extends TemplateAwareExpressionParser {
try {
this.expressionString = expressionString;
Tokenizer tokenizer = new Tokenizer(expressionString);
tokenizer.process();
this.tokenStream = tokenizer.getTokens();
this.tokenStream = tokenizer.process();
this.tokenStreamLength = this.tokenStream.size();
this.tokenStreamPointer = 0;
this.constructedNodes.clear();

View File

@ -28,14 +28,15 @@ import org.springframework.expression.spel.SpelParseException;
* Lex some input data into a stream of tokens that can then be parsed.
*
* @author Andy Clement
* @author Juergen Hoeller
* @author Phillip Webb
* @since 3.0
*/
class Tokenizer {
// if this is changed, it must remain sorted
private static final String[] ALTERNATIVE_OPERATOR_NAMES = { "DIV", "EQ", "GE", "GT",
"LE", "LT", "MOD", "NE", "NOT" };
// If this gets changed, it must remain sorted...
private static final String[] ALTERNATIVE_OPERATOR_NAMES =
{"DIV", "EQ", "GE", "GT", "LE", "LT", "MOD", "NE", "NOT"};
private static final byte FLAGS[] = new byte[256];
@ -64,29 +65,28 @@ class Tokenizer {
}
String expressionString;
private String expressionString;
char[] toProcess;
private char[] charsToProcess;
int pos;
private int pos;
int max;
private int max;
List<Token> tokens = new ArrayList<>();
private List<Token> tokens = new ArrayList<>();
public Tokenizer(String inputData) {
this.expressionString = inputData;
this.toProcess = (inputData + "\0").toCharArray();
this.max = this.toProcess.length;
this.charsToProcess = (inputData + "\0").toCharArray();
this.max = this.charsToProcess.length;
this.pos = 0;
process();
}
public void process() {
public List<Token> process() {
while (this.pos < this.max) {
char ch = this.toProcess[this.pos];
char ch = this.charsToProcess[this.pos];
if (isAlphabetic(ch)) {
lexIdentifier();
}
@ -190,9 +190,7 @@ class Tokenizer {
break;
case '|':
if (!isTwoCharToken(TokenKind.SYMBOLIC_OR)) {
throw new InternalParseException(new SpelParseException(
this.expressionString, this.pos, SpelMessage.MISSING_CHARACTER,
"|"));
raiseParseException(this.pos, SpelMessage.MISSING_CHARACTER, "|");
}
pushPairToken(TokenKind.SYMBOLIC_OR);
break;
@ -261,41 +259,38 @@ class Tokenizer {
break;
case 0:
// hit sentinel at end of value
this.pos++; // will take us to the end
this.pos++; // will take us to the end
break;
case '\\':
throw new InternalParseException(
new SpelParseException(this.expressionString, this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR));
raiseParseException(this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR);
break;
default:
throw new IllegalStateException("Cannot handle (" + Integer.valueOf(ch) + ") '" + ch + "'");
}
}
}
}
public List<Token> getTokens() {
return this.tokens;
}
// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
private void lexQuotedStringLiteral() {
int start = this.pos;
boolean terminated = false;
while (!terminated) {
this.pos++;
char ch = this.toProcess[this.pos];
char ch = this.charsToProcess[this.pos];
if (ch == '\'') {
// may not be the end if the char after is also a '
if (this.toProcess[this.pos + 1] == '\'') {
this.pos++; // skip over that too, and continue
if (this.charsToProcess[this.pos + 1] == '\'') {
this.pos++; // skip over that too, and continue
}
else {
terminated = true;
}
}
if (ch == 0) {
throw new InternalParseException(new SpelParseException(this.expressionString, start,
SpelMessage.NON_TERMINATING_QUOTED_STRING));
if (isExhausted()) {
raiseParseException(start, SpelMessage.NON_TERMINATING_QUOTED_STRING);
}
}
this.pos++;
@ -308,19 +303,18 @@ class Tokenizer {
boolean terminated = false;
while (!terminated) {
this.pos++;
char ch = this.toProcess[this.pos];
char ch = this.charsToProcess[this.pos];
if (ch == '"') {
// may not be the end if the char after is also a "
if (this.toProcess[this.pos + 1] == '"') {
this.pos++; // skip over that too, and continue
if (this.charsToProcess[this.pos + 1] == '"') {
this.pos++; // skip over that too, and continue
}
else {
terminated = true;
}
}
if (ch == 0) {
throw new InternalParseException(new SpelParseException(this.expressionString,
start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING));
if (isExhausted()) {
raiseParseException(start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING);
}
}
this.pos++;
@ -346,7 +340,7 @@ class Tokenizer {
private void lexNumericLiteral(boolean firstCharIsZero) {
boolean isReal = false;
int start = this.pos;
char ch = this.toProcess[this.pos + 1];
char ch = this.charsToProcess[this.pos + 1];
boolean isHex = ch == 'x' || ch == 'X';
// deal with hexadecimal
@ -355,7 +349,7 @@ class Tokenizer {
do {
this.pos++;
}
while (isHexadecimalDigit(this.toProcess[this.pos]));
while (isHexadecimalDigit(this.charsToProcess[this.pos]));
if (isChar('L', 'l')) {
pushHexIntToken(subarray(start + 2, this.pos), true, start, this.pos);
this.pos++;
@ -372,10 +366,10 @@ class Tokenizer {
do {
this.pos++;
}
while (isDigit(this.toProcess[this.pos]));
while (isDigit(this.charsToProcess[this.pos]));
// a '.' indicates this number is a real
ch = this.toProcess[this.pos];
ch = this.charsToProcess[this.pos];
if (ch == '.') {
isReal = true;
int dotpos = this.pos;
@ -383,7 +377,7 @@ class Tokenizer {
do {
this.pos++;
}
while (isDigit(this.toProcess[this.pos]));
while (isDigit(this.charsToProcess[this.pos]));
if (this.pos == dotpos + 1) {
// the number is something like '3.'. It is really an int but may be
// part of something like '3.toString()'. In this case process it as
@ -398,19 +392,18 @@ class Tokenizer {
// Now there may or may not be an exponent
// is it a long ?
// Is it a long ?
if (isChar('L', 'l')) {
if (isReal) { // 3.4L - not allowed
throw new InternalParseException(new SpelParseException(this.expressionString,
start, SpelMessage.REAL_CANNOT_BE_LONG));
if (isReal) { // 3.4L - not allowed
raiseParseException(start, SpelMessage.REAL_CANNOT_BE_LONG);
}
pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
this.pos++;
}
else if (isExponentChar(this.toProcess[this.pos])) {
isReal = true; // if it wasn't before, it is now
else if (isExponentChar(this.charsToProcess[this.pos])) {
isReal = true; // if it wasn't before, it is now
this.pos++;
char possibleSign = this.toProcess[this.pos];
char possibleSign = this.charsToProcess[this.pos];
if (isSign(possibleSign)) {
this.pos++;
}
@ -419,19 +412,19 @@ class Tokenizer {
do {
this.pos++;
}
while (isDigit(this.toProcess[this.pos]));
while (isDigit(this.charsToProcess[this.pos]));
boolean isFloat = false;
if (isFloatSuffix(this.toProcess[this.pos])) {
if (isFloatSuffix(this.charsToProcess[this.pos])) {
isFloat = true;
endOfNumber = ++this.pos;
}
else if (isDoubleSuffix(this.toProcess[this.pos])) {
else if (isDoubleSuffix(this.charsToProcess[this.pos])) {
endOfNumber = ++this.pos;
}
pushRealToken(subarray(start, this.pos), isFloat, start, this.pos);
}
else {
ch = this.toProcess[this.pos];
ch = this.charsToProcess[this.pos];
boolean isFloat = false;
if (isFloatSuffix(ch)) {
isReal = true;
@ -456,7 +449,7 @@ class Tokenizer {
do {
this.pos++;
}
while (isIdentifier(this.toProcess[this.pos]));
while (isIdentifier(this.charsToProcess[this.pos]));
char[] subarray = subarray(start, this.pos);
// Check if this is the alternative (textual) representation of an operator (see
@ -484,14 +477,10 @@ class Tokenizer {
private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
if (data.length == 0) {
if (isLong) {
throw new InternalParseException(new SpelParseException(this.expressionString,
start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start,
end + 1)));
raiseParseException(start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start, end + 1));
}
else {
throw new InternalParseException(new SpelParseException(this.expressionString,
start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(
start, end)));
raiseParseException(start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(start, end));
}
}
if (isLong) {
@ -513,7 +502,7 @@ class Tokenizer {
private char[] subarray(int start, int end) {
char[] result = new char[end - start];
System.arraycopy(this.toProcess, start, result, 0, end - start);
System.arraycopy(this.charsToProcess, start, result, 0, end - start);
return result;
}
@ -522,8 +511,8 @@ class Tokenizer {
*/
private boolean isTwoCharToken(TokenKind kind) {
return (kind.tokenChars.length == 2 &&
this.toProcess[this.pos] == kind.tokenChars[0] &&
this.toProcess[this.pos + 1] == kind.tokenChars[1]);
this.charsToProcess[this.pos] == kind.tokenChars[0] &&
this.charsToProcess[this.pos + 1] == kind.tokenChars[1]);
}
/**
@ -552,7 +541,7 @@ class Tokenizer {
}
private boolean isChar(char a, char b) {
char ch = this.toProcess[this.pos];
char ch = this.charsToProcess[this.pos];
return ch == a || ch == b;
}
@ -593,4 +582,12 @@ class Tokenizer {
return (FLAGS[ch] & IS_HEXDIGIT) != 0;
}
private boolean isExhausted() {
return (this.pos == this.max - 1);
}
private void raiseParseException(int start, SpelMessage msg, Object... inserts) {
throw new InternalParseException(new SpelParseException(this.expressionString, start, msg, inserts));
}
}

View File

@ -59,6 +59,7 @@ import org.springframework.expression.spel.support.ReflectivePropertyAccessor;
import org.springframework.expression.spel.support.StandardEvaluationContext;
import org.springframework.expression.spel.support.StandardTypeLocator;
import org.springframework.expression.spel.testresources.le.div.mod.reserved.Reserver;
import org.springframework.util.ObjectUtils;
import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;
@ -2095,6 +2096,16 @@ public class SpelReproTests extends AbstractExpressionTests {
assertEquals(StandardCharsets.UTF_8, result);
}
@Test
public void SPR16032() {
EvaluationContext context = new StandardEvaluationContext();
context.setVariable("str", "a\0b");
Expression ex = parser.parseExpression("#str?.split('\0')");
Object result = ex.getValue(context);
assertTrue(ObjectUtils.nullSafeEquals(result, new String[] {"a", "b"}));
}
public static class ListOf {