From be597d7ce973541d1d75477310bff1093e73463c Mon Sep 17 00:00:00 2001 From: Andy Wilkinson Date: Mon, 24 Oct 2016 11:44:01 +0100 Subject: [PATCH] Fix handling of cyrillic characters in AsciiBytes hashCode method Closes gh-7202 --- .../boot/loader/jar/AsciiBytes.java | 41 +++++++++++++------ .../boot/loader/jar/AsciiBytesTests.java | 23 ++++++++--- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java b/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java index 83e29a7abec..62f26cedb23 100644 --- a/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java +++ b/spring-boot-tools/spring-boot-loader/src/main/java/org/springframework/boot/loader/jar/AsciiBytes.java @@ -23,6 +23,7 @@ import java.nio.charset.Charset; * reasons to save constructing Strings for ZIP data. * * @author Phillip Webb + * @author Andy Wilkinson */ final class AsciiBytes { @@ -156,21 +157,35 @@ final class AsciiBytes { int hash = this.hash; if (hash == 0 && this.bytes.length > 0) { for (int i = this.offset; i < this.offset + this.length; i++) { - int b = this.bytes[i] & 0xff; - if (b > 0x7F) { - // Decode multi-byte UTF - for (int size = 0; size < 3; size++) { - if ((b & (0x40 >> size)) == 0) { - b = b & (0x1F >> size); - for (int j = 0; j < size; j++) { - b <<= 6; - b |= this.bytes[++i] & 0x3F; - } - break; - } + int b = this.bytes[i]; + if (b < 0) { + b = b & 0x7F; + int limit; + int excess = 0x80; + if (b < 96) { + limit = 1; + excess += 0x40 << 6; } + else if (b < 112) { + limit = 2; + excess += (0x60 << 12) + (0x80 << 6); + } + else { + limit = 3; + excess += (0x70 << 18) + (0x80 << 12) + (0x80 << 6); + } + for (int j = 0; j < limit; j++) { + b = (b << 6) + (this.bytes[++i] & 0xFF); + } + b -= excess; + } + if (b <= 0xFFFF) { + hash = 31 * hash + b; + } + else { + hash = 31 * hash + ((b >> 0xA) + 0xD7C0); + hash = 31 * hash + ((b & 0x3FF) + 0xDC00); } - hash = 31 * hash + b; } this.hash = hash; } diff --git a/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java b/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java index f1c5458c6d9..03c13c8658e 100644 --- a/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java +++ b/spring-boot-tools/spring-boot-loader/src/test/java/org/springframework/boot/loader/jar/AsciiBytesTests.java @@ -26,6 +26,7 @@ import static org.assertj.core.api.Assertions.assertThat; * Tests for {@link AsciiBytes}. * * @author Phillip Webb + * @author Andy Wilkinson */ public class AsciiBytesTests { @@ -140,16 +141,26 @@ public class AsciiBytesTests { @Test public void hashCodeSameAsString() throws Exception { - String s = "abcABC123xyz!"; - AsciiBytes a = new AsciiBytes(s); - assertThat(s.hashCode()).isEqualTo(a.hashCode()); + hashCodeSameAsString("abcABC123xyz!"); } @Test public void hashCodeSameAsStringWithSpecial() throws Exception { - String s = "special/\u00EB.dat"; - AsciiBytes a = new AsciiBytes(s); - assertThat(s.hashCode()).isEqualTo(a.hashCode()); + hashCodeSameAsString("special/\u00EB.dat"); + } + + @Test + public void hashCodeSameAsStringWithCyrillicCharacters() throws Exception { + hashCodeSameAsString("\u0432\u0435\u0441\u043D\u0430"); + } + + @Test + public void hashCodeSameAsStringWithEmoji() throws Exception { + hashCodeSameAsString("\ud83d\udca9"); + } + + private void hashCodeSameAsString(String input) { + assertThat(new AsciiBytes(input).hashCode()).isEqualTo(input.hashCode()); } }