Tokenizer uses single process step and length-based exhaustion check

Issue: SPR-16032
2017-10-04 12:23:16 +02:00 · 2017-10-04 12:23:16 +02:00 · 03b68286a2
parent b2017bb1ec
commit 03b68286a2
3 changed files with 70 additions and 63 deletions
--- a/spring-expression/src/main/java/org/springframework/expression/spel/standard/InternalSpelExpressionParser.java
+++ b/spring-expression/src/main/java/org/springframework/expression/spel/standard/InternalSpelExpressionParser.java
@ -124,8 +124,7 @@ class InternalSpelExpressionParser extends TemplateAwareExpressionParser {
 		try {
 			this.expressionString = expressionString;
 			Tokenizer tokenizer = new Tokenizer(expressionString);
-			tokenizer.process();
-			this.tokenStream = tokenizer.getTokens();
+			this.tokenStream = tokenizer.process();
 			this.tokenStreamLength = this.tokenStream.size();
 			this.tokenStreamPointer = 0;
 			this.constructedNodes.clear();
--- a/spring-expression/src/main/java/org/springframework/expression/spel/standard/Tokenizer.java
+++ b/spring-expression/src/main/java/org/springframework/expression/spel/standard/Tokenizer.java
@ -28,14 +28,15 @@ import org.springframework.expression.spel.SpelParseException;
 * Lex some input data into a stream of tokens that can then be parsed.
 *
 * @author Andy Clement
+ * @author Juergen Hoeller
 * @author Phillip Webb
 * @since 3.0
 */
 class Tokenizer {

-	// if this is changed, it must remain sorted
-	private static final String[] ALTERNATIVE_OPERATOR_NAMES = { "DIV", "EQ", "GE", "GT",
-		"LE", "LT", "MOD", "NE", "NOT" };
+	// If this gets changed, it must remain sorted...
+	private static final String[] ALTERNATIVE_OPERATOR_NAMES =
+			{"DIV", "EQ", "GE", "GT", "LE", "LT", "MOD", "NE", "NOT"};

 	private static final byte FLAGS[] = new byte[256];

@ -64,29 +65,28 @@ class Tokenizer {
 	}


-	String expressionString;
+	private String expressionString;

-	char[] toProcess;
+	private char[] charsToProcess;

-	int pos;
+	private int pos;

-	int max;
+	private int max;

-	List<Token> tokens = new ArrayList<>();
+	private List<Token> tokens = new ArrayList<>();


 	public Tokenizer(String inputData) {
 		this.expressionString = inputData;
-		this.toProcess = (inputData + "\0").toCharArray();
-		this.max = this.toProcess.length;
+		this.charsToProcess = (inputData + "\0").toCharArray();
+		this.max = this.charsToProcess.length;
 		this.pos = 0;
-		process();
 	}


-	public void process() {
+	public List<Token> process() {
 		while (this.pos < this.max) {
-			char ch = this.toProcess[this.pos];
+			char ch = this.charsToProcess[this.pos];
 			if (isAlphabetic(ch)) {
 				lexIdentifier();
 			}
@ -190,9 +190,7 @@ class Tokenizer {
 						break;
 					case '|':
 						if (!isTwoCharToken(TokenKind.SYMBOLIC_OR)) {
-							throw new InternalParseException(new SpelParseException(
-									this.expressionString, this.pos, SpelMessage.MISSING_CHARACTER,
-									"|"));
+							raiseParseException(this.pos, SpelMessage.MISSING_CHARACTER, "|");
 						}
 						pushPairToken(TokenKind.SYMBOLIC_OR);
 						break;
@ -261,41 +259,38 @@ class Tokenizer {
 						break;
 					case 0:
 						// hit sentinel at end of value
-						this.pos++; // will take us to the end
+						this.pos++;  // will take us to the end
 						break;
 					case '\\':
-						throw new InternalParseException(
-								new SpelParseException(this.expressionString, this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR));
+						raiseParseException(this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR);
+						break;
 					default:
 						throw new IllegalStateException("Cannot handle (" + Integer.valueOf(ch) + ") '" + ch + "'");
 				}
 			}
 		}
-	}
-
-	public List<Token> getTokens() {
 		return this.tokens;
 	}

+
 	// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
 	private void lexQuotedStringLiteral() {
 		int start = this.pos;
 		boolean terminated = false;
 		while (!terminated) {
 			this.pos++;
-			char ch = this.toProcess[this.pos];
+			char ch = this.charsToProcess[this.pos];
 			if (ch == '\'') {
 				// may not be the end if the char after is also a '
-				if (this.toProcess[this.pos + 1] == '\'') {
-					this.pos++; // skip over that too, and continue
+				if (this.charsToProcess[this.pos + 1] == '\'') {
+					this.pos++;  // skip over that too, and continue
 				}
 				else {
 					terminated = true;
 				}
 			}
-			if (ch == 0) {
-				throw new InternalParseException(new SpelParseException(this.expressionString, start,
-						SpelMessage.NON_TERMINATING_QUOTED_STRING));
+			if (isExhausted()) {
+				raiseParseException(start, SpelMessage.NON_TERMINATING_QUOTED_STRING);
 			}
 		}
 		this.pos++;
@ -308,19 +303,18 @@ class Tokenizer {
 		boolean terminated = false;
 		while (!terminated) {
 			this.pos++;
-			char ch = this.toProcess[this.pos];
+			char ch = this.charsToProcess[this.pos];
 			if (ch == '"') {
 				// may not be the end if the char after is also a "
-				if (this.toProcess[this.pos + 1] == '"') {
-					this.pos++; // skip over that too, and continue
+				if (this.charsToProcess[this.pos + 1] == '"') {
+					this.pos++;  // skip over that too, and continue
 				}
 				else {
 					terminated = true;
 				}
 			}
-			if (ch == 0) {
-				throw new InternalParseException(new SpelParseException(this.expressionString,
-						start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING));
+			if (isExhausted()) {
+				raiseParseException(start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING);
 			}
 		}
 		this.pos++;
@ -346,7 +340,7 @@ class Tokenizer {
 	private void lexNumericLiteral(boolean firstCharIsZero) {
 		boolean isReal = false;
 		int start = this.pos;
-		char ch = this.toProcess[this.pos + 1];
+		char ch = this.charsToProcess[this.pos + 1];
 		boolean isHex = ch == 'x' || ch == 'X';

 		// deal with hexadecimal
@ -355,7 +349,7 @@ class Tokenizer {
 			do {
 				this.pos++;
 			}
-			while (isHexadecimalDigit(this.toProcess[this.pos]));
+			while (isHexadecimalDigit(this.charsToProcess[this.pos]));
 			if (isChar('L', 'l')) {
 				pushHexIntToken(subarray(start + 2, this.pos), true, start, this.pos);
 				this.pos++;
@ -372,10 +366,10 @@ class Tokenizer {
 		do {
 			this.pos++;
 		}
-		while (isDigit(this.toProcess[this.pos]));
+		while (isDigit(this.charsToProcess[this.pos]));

 		// a '.' indicates this number is a real
-		ch = this.toProcess[this.pos];
+		ch = this.charsToProcess[this.pos];
 		if (ch == '.') {
 			isReal = true;
 			int dotpos = this.pos;
@ -383,7 +377,7 @@ class Tokenizer {
 			do {
 				this.pos++;
 			}
-			while (isDigit(this.toProcess[this.pos]));
+			while (isDigit(this.charsToProcess[this.pos]));
 			if (this.pos == dotpos + 1) {
 				// the number is something like '3.'. It is really an int but may be
 				// part of something like '3.toString()'. In this case process it as
@ -398,19 +392,18 @@ class Tokenizer {

 		// Now there may or may not be an exponent

-		// is it a long ?
+		// Is it a long ?
 		if (isChar('L', 'l')) {
-			if (isReal) { // 3.4L - not allowed
-				throw new InternalParseException(new SpelParseException(this.expressionString,
-						start, SpelMessage.REAL_CANNOT_BE_LONG));
+			if (isReal) {  // 3.4L - not allowed
+				raiseParseException(start, SpelMessage.REAL_CANNOT_BE_LONG);
 			}
 			pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
 			this.pos++;
 		}
-		else if (isExponentChar(this.toProcess[this.pos])) {
-			isReal = true; // if it wasn't before, it is now
+		else if (isExponentChar(this.charsToProcess[this.pos])) {
+			isReal = true;  // if it wasn't before, it is now
 			this.pos++;
-			char possibleSign = this.toProcess[this.pos];
+			char possibleSign = this.charsToProcess[this.pos];
 			if (isSign(possibleSign)) {
 				this.pos++;
 			}
@ -419,19 +412,19 @@ class Tokenizer {
 			do {
 				this.pos++;
 			}
-			while (isDigit(this.toProcess[this.pos]));
+			while (isDigit(this.charsToProcess[this.pos]));
 			boolean isFloat = false;
-			if (isFloatSuffix(this.toProcess[this.pos])) {
+			if (isFloatSuffix(this.charsToProcess[this.pos])) {
 				isFloat = true;
 				endOfNumber = ++this.pos;
 			}
-			else if (isDoubleSuffix(this.toProcess[this.pos])) {
+			else if (isDoubleSuffix(this.charsToProcess[this.pos])) {
 				endOfNumber = ++this.pos;
 			}
 			pushRealToken(subarray(start, this.pos), isFloat, start, this.pos);
 		}
 		else {
-			ch = this.toProcess[this.pos];
+			ch = this.charsToProcess[this.pos];
 			boolean isFloat = false;
 			if (isFloatSuffix(ch)) {
 				isReal = true;
@ -456,7 +449,7 @@ class Tokenizer {
 		do {
 			this.pos++;
 		}
-		while (isIdentifier(this.toProcess[this.pos]));
+		while (isIdentifier(this.charsToProcess[this.pos]));
 		char[] subarray = subarray(start, this.pos);

 		// Check if this is the alternative (textual) representation of an operator (see
@ -484,14 +477,10 @@ class Tokenizer {
 	private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
 		if (data.length == 0) {
 			if (isLong) {
-				throw new InternalParseException(new SpelParseException(this.expressionString,
-						start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start,
-								end + 1)));
+				raiseParseException(start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start, end + 1));
 			}
 			else {
-				throw new InternalParseException(new SpelParseException(this.expressionString,
-						start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(
-								start, end)));
+				raiseParseException(start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(start, end));
 			}
 		}
 		if (isLong) {
@ -513,7 +502,7 @@ class Tokenizer {

 	private char[] subarray(int start, int end) {
 		char[] result = new char[end - start];
-		System.arraycopy(this.toProcess, start, result, 0, end - start);
+		System.arraycopy(this.charsToProcess, start, result, 0, end - start);
 		return result;
 	}

@ -522,8 +511,8 @@ class Tokenizer {
 	 */
 	private boolean isTwoCharToken(TokenKind kind) {
 		return (kind.tokenChars.length == 2 &&
-				this.toProcess[this.pos] == kind.tokenChars[0] &&
-				this.toProcess[this.pos + 1] == kind.tokenChars[1]);
+				this.charsToProcess[this.pos] == kind.tokenChars[0] &&
+				this.charsToProcess[this.pos + 1] == kind.tokenChars[1]);
 	}

 	/**
@ -552,7 +541,7 @@ class Tokenizer {
 	}

 	private boolean isChar(char a, char b) {
-		char ch = this.toProcess[this.pos];
+		char ch = this.charsToProcess[this.pos];
 		return ch == a || ch == b;
 	}

@ -593,4 +582,12 @@ class Tokenizer {
 		return (FLAGS[ch] & IS_HEXDIGIT) != 0;
 	}

+	private boolean isExhausted() {
+		return (this.pos == this.max - 1);
+	}
+
+	private void raiseParseException(int start, SpelMessage msg, Object... inserts) {
+		throw new InternalParseException(new SpelParseException(this.expressionString, start, msg, inserts));
+	}
+
 }
--- a/spring-expression/src/test/java/org/springframework/expression/spel/SpelReproTests.java
+++ b/spring-expression/src/test/java/org/springframework/expression/spel/SpelReproTests.java
@ -59,6 +59,7 @@ import org.springframework.expression.spel.support.ReflectivePropertyAccessor;
 import org.springframework.expression.spel.support.StandardEvaluationContext;
 import org.springframework.expression.spel.support.StandardTypeLocator;
 import org.springframework.expression.spel.testresources.le.div.mod.reserved.Reserver;
+import org.springframework.util.ObjectUtils;

 import static org.hamcrest.Matchers.*;
 import static org.junit.Assert.*;
@ -2095,6 +2096,16 @@ public class SpelReproTests extends AbstractExpressionTests {
 		assertEquals(StandardCharsets.UTF_8, result);
 	}

+	@Test
+	public void SPR16032() {
+		EvaluationContext context = new StandardEvaluationContext();
+		context.setVariable("str", "a\0b");
+
+		Expression ex = parser.parseExpression("#str?.split('\0')");
+		Object result = ex.getValue(context);
+		assertTrue(ObjectUtils.nullSafeEquals(result, new String[] {"a", "b"}));
+	}
+

 	public static class ListOf {