CQ shared: Fix off-by-nine error leading to lost messages

And `eof` crashes.

The problem is that we may end up trying to read more data
from the file when scanning, despite being at the end of the
file. This results in the current Acc to be returned instead
of the remaining data being parsed.

This results in some messages at the end of the file being
truncated off despite still being in memory (and still pointing
to the end of the original file, well past the truncation point).
This commit is contained in:
Loïc Hoguin 2025-09-19 15:56:58 +02:00 committed by Michael Klishin
parent 8e9ff4df53
commit ed080e8a27
No known key found for this signature in database
GPG Key ID: 16AB14D00D613900
2 changed files with 12 additions and 1 deletions

View File

@ -1599,7 +1599,7 @@ scan_data(<<Size:64, MsgIdInt:128, _Rest/bits>> = Data, Fd, Fun, Offset, FileSiz
end; end;
%% This might be the start of a message. %% This might be the start of a message.
scan_data(<<Size:64, Rest/bits>> = Data, Fd, Fun, Offset, FileSize, MsgIdsFound, Acc) scan_data(<<Size:64, Rest/bits>> = Data, Fd, Fun, Offset, FileSize, MsgIdsFound, Acc)
when byte_size(Rest) < Size + 1, Size < FileSize - Offset -> when byte_size(Rest) < Size + 1, Size + 9 =< FileSize - Offset ->
scan(Data, Fd, Fun, Offset, FileSize, MsgIdsFound, Acc); scan(Data, Fd, Fun, Offset, FileSize, MsgIdsFound, Acc);
scan_data(Data, Fd, Fun, Offset, FileSize, MsgIdsFound, Acc) scan_data(Data, Fd, Fun, Offset, FileSize, MsgIdsFound, Acc)
when byte_size(Data) < 8 -> when byte_size(Data) < 8 ->

View File

@ -710,6 +710,17 @@ msg_store_file_scan1(Config) ->
[{bin, <<0, 0:48, 17, 17, "idididididididid", 255, 0:4352/unit:8, 255>>}], [{bin, <<0, 0:48, 17, 17, "idididididididid", 255, 0:4352/unit:8, 255>>}],
{ok, [{<<"idididididididid">>, 4378, 1}]}, {ok, [{<<"idididididididid">>, 4378, 1}]},
fun(Obj = {<<"idididididididid">>, 4378, 1}) -> {valid, Obj}; (_) -> invalid end), fun(Obj = {<<"idididididididid">>, 4378, 1}) -> {valid, Obj}; (_) -> invalid end),
%% Off-by-nine regression testing. The file scanning could miss
%% some messages if previous data looked like a message but its
%% size went past the end of the file.
lists:foreach(fun(N) ->
ok = Scan([
{bin, <<(4194304 + N):64, 0:(4194304 - 8 - 25 - 10)/unit:8>>},
{msg, gen_id(), <<>>},
%% Padding ensures there's no 255 at the end of the size indicated by 'bin'.
{pad, 10}
])
end, lists:seq(-9, -1)),
%% All good!! %% All good!!
passed. passed.