Fix AsciiBytes unicode decoding
Fix the decoding logic in the AsciiBytes `hashCode` and `matches` to correctly deal with multi-byte encodings. Fixes gh-12504
This commit is contained in:
parent
98a2a91d16
commit
9a64d3bf3f
|
@ -29,7 +29,9 @@ final class AsciiBytes {
|
||||||
|
|
||||||
private static final String EMPTY_STRING = "";
|
private static final String EMPTY_STRING = "";
|
||||||
|
|
||||||
private static final int[] EXCESS = { 0x0, 0x1080, 0x96, 0x1c82080 };
|
private static final int[] INITIAL_BYTE_BITMASK = { 0x7F, 0x1F, 0x0F, 0x07 };
|
||||||
|
|
||||||
|
private static final int SUBSEQUENT_BYTE_BITMASK = 0x3F;
|
||||||
|
|
||||||
private final byte[] bytes;
|
private final byte[] bytes;
|
||||||
|
|
||||||
|
@ -142,13 +144,10 @@ final class AsciiBytes {
|
||||||
int totalLen = (nameLen + (suffix == 0 ? 0 : 1));
|
int totalLen = (nameLen + (suffix == 0 ? 0 : 1));
|
||||||
for (int i = this.offset; i < this.offset + this.length; i++) {
|
for (int i = this.offset; i < this.offset + this.length; i++) {
|
||||||
int b = this.bytes[i];
|
int b = this.bytes[i];
|
||||||
if (b < 0) {
|
int remainingUtfBytes = getNumberOfUtfBytes(b) - 1;
|
||||||
b = b & 0x7F;
|
b &= INITIAL_BYTE_BITMASK[remainingUtfBytes];
|
||||||
int limit = getRemainingUtfBytes(b);
|
for (int j = 0; j < remainingUtfBytes; j++) {
|
||||||
for (int j = 0; j < limit; j++) {
|
b = (b << 6) + (this.bytes[++i] & SUBSEQUENT_BYTE_BITMASK);
|
||||||
b = (b << 6) + (this.bytes[++i] & 0xFF);
|
|
||||||
}
|
|
||||||
b -= EXCESS[limit];
|
|
||||||
}
|
}
|
||||||
char c = getChar(name, suffix, charIndex++);
|
char c = getChar(name, suffix, charIndex++);
|
||||||
if (b <= 0xFFFF) {
|
if (b <= 0xFFFF) {
|
||||||
|
@ -185,13 +184,10 @@ final class AsciiBytes {
|
||||||
if (hash == 0 && this.bytes.length > 0) {
|
if (hash == 0 && this.bytes.length > 0) {
|
||||||
for (int i = this.offset; i < this.offset + this.length; i++) {
|
for (int i = this.offset; i < this.offset + this.length; i++) {
|
||||||
int b = this.bytes[i];
|
int b = this.bytes[i];
|
||||||
if (b < 0) {
|
int remainingUtfBytes = getNumberOfUtfBytes(b) - 1;
|
||||||
b = b & 0x7F;
|
b &= INITIAL_BYTE_BITMASK[remainingUtfBytes];
|
||||||
int limit = getRemainingUtfBytes(b);
|
for (int j = 0; j < remainingUtfBytes; j++) {
|
||||||
for (int j = 0; j < limit; j++) {
|
b = (b << 6) + (this.bytes[++i] & SUBSEQUENT_BYTE_BITMASK);
|
||||||
b = (b << 6) + (this.bytes[++i] & 0xFF);
|
|
||||||
}
|
|
||||||
b -= EXCESS[limit];
|
|
||||||
}
|
}
|
||||||
if (b <= 0xFFFF) {
|
if (b <= 0xFFFF) {
|
||||||
hash = 31 * hash + b;
|
hash = 31 * hash + b;
|
||||||
|
@ -206,8 +202,16 @@ final class AsciiBytes {
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getRemainingUtfBytes(int b) {
|
private int getNumberOfUtfBytes(int b) {
|
||||||
return (b < 96 ? 1 : (b < 112 ? 2 : 3));
|
if ((b & 0x80) == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
int numberOfUtfBytes = 0;
|
||||||
|
while ((b & 0x80) != 0) {
|
||||||
|
b <<= 1;
|
||||||
|
numberOfUtfBytes++;
|
||||||
|
}
|
||||||
|
return numberOfUtfBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2012-2017 the original author or authors.
|
* Copyright 2012-2018 the original author or authors.
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -184,6 +184,18 @@ public class AsciiBytesTests {
|
||||||
matchesSameAsString("\ud83d\udca9");
|
matchesSameAsString("\ud83d\udca9");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void hashCodeFromInstanceMatchesHashCodeFromString() {
|
||||||
|
String name = "fonts/宋体/simsun.ttf";
|
||||||
|
assertThat(new AsciiBytes(name).hashCode()).isEqualTo(AsciiBytes.hashCode(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void instanceCreatedFromCharSequenceMatchesSameCharSequence() {
|
||||||
|
String name = "fonts/宋体/simsun.ttf";
|
||||||
|
assertThat(new AsciiBytes(name).matches(name, NO_SUFFIX)).isTrue();
|
||||||
|
}
|
||||||
|
|
||||||
private void matchesSameAsString(String input) {
|
private void matchesSameAsString(String input) {
|
||||||
assertThat(new AsciiBytes(input).matches(input, NO_SUFFIX)).isTrue();
|
assertThat(new AsciiBytes(input).matches(input, NO_SUFFIX)).isTrue();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue