Merge branch '5.2.x' into master

This commit is contained in:
Rossen Stoyanchev 2020-10-23 15:14:05 +01:00
commit cf988f81b5
2 changed files with 276 additions and 152 deletions

View File

@ -30,7 +30,6 @@ import java.nio.channels.WritableByteChannel;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.Callable;
@ -568,65 +567,290 @@ public abstract class DataBufferUtils {
/**
* Return a {@link Matcher} for the given delimiter.
* The matcher can be used to find the delimiters in data buffers.
* The matcher can be used to find the delimiters in a stream of data buffers.
* @param delimiter the delimiter bytes to find
* @return the matcher
* @since 5.2
*/
public static Matcher matcher(byte[] delimiter) {
Assert.isTrue(delimiter.length > 0, "Delimiter must not be empty");
return new KnuthMorrisPrattMatcher(delimiter);
return createMatcher(delimiter);
}
/** Return a {@link Matcher} for the given delimiters.
* The matcher can be used to find the delimiters in data buffers.
/**
* Return a {@link Matcher} for the given delimiters.
* The matcher can be used to find the delimiters in a stream of data buffers.
* @param delimiters the delimiters bytes to find
* @return the matcher
* @since 5.2
*/
public static Matcher matcher(byte[]... delimiters) {
Assert.isTrue(delimiters.length > 0, "Delimiters must not be empty");
if (delimiters.length == 1) {
return matcher(delimiters[0]);
}
else {
Matcher[] matchers = new Matcher[delimiters.length];
for (int i = 0; i < delimiters.length; i++) {
matchers[i] = matcher(delimiters[i]);
}
return new CompositeMatcher(matchers);
return (delimiters.length == 1 ? createMatcher(delimiters[0]) : new CompositeMatcher(delimiters));
}
private static NestedMatcher createMatcher(byte[] delimiter) {
Assert.isTrue(delimiter.length > 0, "Delimiter must not be empty");
switch (delimiter.length) {
case 1:
return (delimiter[0] == 10 ? SingleByteMatcher.NEWLINE_MATCHER : new SingleByteMatcher(delimiter));
case 2:
return new TwoByteMatcher(delimiter);
default:
return new KnuthMorrisPrattMatcher(delimiter);
}
}
/**
* Defines an object that matches a data buffer against a delimiter.
* Contract to find delimiter(s) against one or more data buffers that can
* be passed one at a time to the {@link #match(DataBuffer)} method.
*
* @since 5.2
* @see #match(DataBuffer)
*/
public interface Matcher {
/**
* Returns the position of the final matching delimiter byte that matches the given buffer,
* or {@code -1} if not found.
* @param dataBuffer the buffer in which to search for the delimiter
* @return the position of the final matching delimiter, or {@code -1} if not found.
* Find the first matching delimiter and return the index of the last
* byte of the delimiter, or {@code -1} if not found.
*/
int match(DataBuffer dataBuffer);
/**
* Return the delimiter used for this matcher.
* @return the delimiter
* Return the delimiter from the last invocation of {@link #match(DataBuffer)}.
*/
byte[] delimiter();
/**
* Resets the state of this matcher.
* Reset the state of this matcher.
*/
void reset();
}
/**
* Matcher that supports searching for multiple delimiters.
*/
private static class CompositeMatcher implements Matcher {
private static final byte[] NO_DELIMITER = new byte[0];
private final NestedMatcher[] matchers;
byte[] longestDelimiter = NO_DELIMITER;
CompositeMatcher(byte[][] delimiters) {
this.matchers = initMatchers(delimiters);
}
private static NestedMatcher[] initMatchers(byte[][] delimiters) {
NestedMatcher[] matchers = new NestedMatcher[delimiters.length];
for (int i = 0; i < delimiters.length; i++) {
matchers[i] = createMatcher(delimiters[i]);
}
return matchers;
}
@Override
public int match(DataBuffer dataBuffer) {
this.longestDelimiter = NO_DELIMITER;
for (int pos = dataBuffer.readPosition(); pos < dataBuffer.writePosition(); pos++) {
byte b = dataBuffer.getByte(pos);
for (NestedMatcher matcher : this.matchers) {
if (matcher.match(b) && matcher.delimiter().length > this.longestDelimiter.length) {
this.longestDelimiter = matcher.delimiter();
}
}
if (this.longestDelimiter != NO_DELIMITER) {
reset();
return pos;
}
}
return -1;
}
@Override
public byte[] delimiter() {
Assert.state(this.longestDelimiter != NO_DELIMITER, "Illegal state!");
return this.longestDelimiter;
}
@Override
public void reset() {
for (NestedMatcher matcher : this.matchers) {
matcher.reset();
}
}
}
/**
* Matcher that can be nested within {@link CompositeMatcher} where multiple
* matchers advance together using the same index, one byte at a time.
*/
private interface NestedMatcher extends Matcher {
/**
* Perform a match against the next byte of the stream and return true
* if the delimiter is fully matched.
*/
boolean match(byte b);
}
/**
* Matcher for a single byte delimiter.
*/
private static class SingleByteMatcher implements NestedMatcher {
static SingleByteMatcher NEWLINE_MATCHER = new SingleByteMatcher(new byte[] {10});
private final byte[] delimiter;
SingleByteMatcher(byte[] delimiter) {
Assert.isTrue(delimiter.length == 1, "Expected a 1 byte delimiter");
this.delimiter = delimiter;
}
@Override
public int match(DataBuffer dataBuffer) {
for (int pos = dataBuffer.readPosition(); pos < dataBuffer.writePosition(); pos++) {
byte b = dataBuffer.getByte(pos);
if (match(b)) {
return pos;
}
}
return -1;
}
@Override
public boolean match(byte b) {
return this.delimiter[0] == b;
}
@Override
public byte[] delimiter() {
return this.delimiter;
}
@Override
public void reset() {
}
}
/**
* Base class for a {@link NestedMatcher}.
*/
private static abstract class AbstractNestedMatcher implements NestedMatcher {
private final byte[] delimiter;
private int matches = 0;
protected AbstractNestedMatcher(byte[] delimiter) {
this.delimiter = delimiter;
}
protected void setMatches(int index) {
this.matches = index;
}
protected int getMatches() {
return this.matches;
}
@Override
public int match(DataBuffer dataBuffer) {
for (int pos = dataBuffer.readPosition(); pos < dataBuffer.writePosition(); pos++) {
byte b = dataBuffer.getByte(pos);
if (match(b)) {
reset();
return pos;
}
}
return -1;
}
@Override
public boolean match(byte b) {
if (b == this.delimiter[this.matches]) {
this.matches++;
return (this.matches == delimiter().length);
}
return false;
}
@Override
public byte[] delimiter() {
return this.delimiter;
}
@Override
public void reset() {
this.matches = 0;
}
}
/**
* Matcher with a 2 byte delimiter that does not benefit from a
* Knuth-Morris-Pratt suffix-prefix table.
*/
private static class TwoByteMatcher extends AbstractNestedMatcher {
protected TwoByteMatcher(byte[] delimiter) {
super(delimiter);
Assert.isTrue(delimiter.length == 2, "Expected a 2-byte delimiter");
}
}
/**
* Implementation of {@link Matcher} that uses the Knuth-Morris-Pratt algorithm.
* @see <a href="https://www.nayuki.io/page/knuth-morris-pratt-string-matching">Knuth-Morris-Pratt string matching</a>
*/
private static class KnuthMorrisPrattMatcher extends AbstractNestedMatcher {
private final int[] table;
public KnuthMorrisPrattMatcher(byte[] delimiter) {
super(delimiter);
this.table = longestSuffixPrefixTable(delimiter);
}
private static int[] longestSuffixPrefixTable(byte[] delimiter) {
int[] result = new int[delimiter.length];
result[0] = 0;
for (int i = 1; i < delimiter.length; i++) {
int j = result[i - 1];
while (j > 0 && delimiter[i] != delimiter[j]) {
j = result[j - 1];
}
if (delimiter[i] == delimiter[j]) {
j++;
}
result[i] = j;
}
return result;
}
@Override
public boolean match(byte b) {
while (getMatches() > 0 && b != delimiter()[getMatches()]) {
setMatches(this.table[getMatches() - 1]);
}
return super.match(b);
}
}
private static class ReadableByteChannelGenerator implements Consumer<SynchronousSink<DataBuffer>> {
private final ReadableByteChannel channel;
@ -908,124 +1132,4 @@ public abstract class DataBufferUtils {
}
}
/**
* Implementation of {@link Matcher} that uses the Knuth-Morris-Pratt algorithm.
* @see <a href="https://www.nayuki.io/page/knuth-morris-pratt-string-matching">Knuth-Morris-Pratt string matching</a>
*/
private static class KnuthMorrisPrattMatcher implements Matcher {
private final byte[] delimiter;
private final int[] table;
private int matches = 0;
public KnuthMorrisPrattMatcher(byte[] delimiter) {
this.delimiter = Arrays.copyOf(delimiter, delimiter.length);
this.table = longestSuffixPrefixTable(delimiter);
}
private static int[] longestSuffixPrefixTable(byte[] delimiter) {
int[] result = new int[delimiter.length];
result[0] = 0;
for (int i = 1; i < delimiter.length; i++) {
int j = result[i - 1];
while (j > 0 && delimiter[i] != delimiter[j]) {
j = result[j - 1];
}
if (delimiter[i] == delimiter[j]) {
j++;
}
result[i] = j;
}
return result;
}
@Override
public int match(DataBuffer dataBuffer) {
for (int i = dataBuffer.readPosition(); i < dataBuffer.writePosition(); i++) {
byte b = dataBuffer.getByte(i);
while (this.matches > 0 && b != this.delimiter[this.matches]) {
this.matches = this.table[this.matches - 1];
}
if (b == this.delimiter[this.matches]) {
this.matches++;
if (this.matches == this.delimiter.length) {
reset();
return i;
}
}
}
return -1;
}
@Override
public byte[] delimiter() {
return Arrays.copyOf(this.delimiter, this.delimiter.length);
}
@Override
public void reset() {
this.matches = 0;
}
}
/**
* Implementation of {@link Matcher} that wraps several other matchers.
*/
private static class CompositeMatcher implements Matcher {
private static final byte[] NO_DELIMITER = new byte[0];
private final Matcher[] matchers;
byte[] longestDelimiter = NO_DELIMITER;
public CompositeMatcher(Matcher[] matchers) {
this.matchers = matchers;
}
@Override
public int match(DataBuffer dataBuffer) {
this.longestDelimiter = NO_DELIMITER;
int bestEndIdx = Integer.MAX_VALUE;
for (Matcher matcher : this.matchers) {
int endIdx = matcher.match(dataBuffer);
if (endIdx != -1 &&
endIdx <= bestEndIdx &&
matcher.delimiter().length > this.longestDelimiter.length) {
bestEndIdx = endIdx;
this.longestDelimiter = matcher.delimiter();
}
}
if (bestEndIdx == Integer.MAX_VALUE) {
this.longestDelimiter = NO_DELIMITER;
return -1;
}
else {
reset();
return bestEndIdx;
}
}
@Override
public byte[] delimiter() {
Assert.state(this.longestDelimiter != NO_DELIMITER, "Illegal state!");
return this.longestDelimiter;
}
@Override
public void reset() {
for (Matcher matcher : this.matchers) {
matcher.reset();
}
}
}
}

View File

@ -886,18 +886,38 @@ class DataBufferUtilsTests extends AbstractDataBufferAllocatingTests {
void matcher2(String displayName, DataBufferFactory bufferFactory) {
super.bufferFactory = bufferFactory;
DataBuffer foo = stringBuffer("fooobar");
DataBuffer foo = stringBuffer("foooobar");
byte[] delims = "oo".getBytes(StandardCharsets.UTF_8);
DataBufferUtils.Matcher matcher = DataBufferUtils.matcher(delims);
int result = matcher.match(foo);
assertThat(result).isEqualTo(2);
foo.readPosition(2);
result = matcher.match(foo);
assertThat(result).isEqualTo(3);
foo.readPosition(3);
result = matcher.match(foo);
assertThat(result).isEqualTo(-1);
int endIndex = matcher.match(foo);
assertThat(endIndex).isEqualTo(2);
foo.readPosition(endIndex + 1);
endIndex = matcher.match(foo);
assertThat(endIndex).isEqualTo(4);
foo.readPosition(endIndex + 1);
endIndex = matcher.match(foo);
assertThat(endIndex).isEqualTo(-1);
release(foo);
}
@ParameterizedDataBufferAllocatingTest
void matcher3(String displayName, DataBufferFactory bufferFactory) {
super.bufferFactory = bufferFactory;
DataBuffer foo = stringBuffer("foooobar");
byte[] delims = "oo".getBytes(StandardCharsets.UTF_8);
DataBufferUtils.Matcher matcher = DataBufferUtils.matcher(delims);
int endIndex = matcher.match(foo);
assertThat(endIndex).isEqualTo(2);
foo.readPosition(endIndex + 1);
endIndex = matcher.match(foo);
assertThat(endIndex).isEqualTo(4);
foo.readPosition(endIndex + 1);
endIndex = matcher.match(foo);
assertThat(endIndex).isEqualTo(-1);
release(foo);
}