mirror of https://github.com/ollama/ollama.git
model: fix boundary in bpe
0x007e is a tilde and was getting adjusted (+0x00a2) to 0x0120 in the encode, but then in the decode it was getting adjusted down (-0x0100) to 0x0020. The boundary for the +0x00a2 case has been adjusted to fix this Fixes: #11966
This commit is contained in:
parent
f8c4f106cc
commit
5de9659112
|
@ -109,7 +109,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
|
|||
r = 0x0143
|
||||
case r <= 0x0020:
|
||||
r = r + 0x0100
|
||||
case r >= 0x007e && r <= 0x00a0:
|
||||
case r >= 0x007f && r <= 0x00a0:
|
||||
r = r + 0x00a2
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue