Add lexer, parser, and evaluator

> A message selector is a String whose syntax is based on a subset of the SQL92[2] conditional expression syntax.

* `leex` is used to scan this string into a flat structure of tokens.
* `yecc` is used to parse these tokens into a tree (conforming with the grammar rules).

Example:

```erl
1> String = "JMSType = 'car' AND color = 'blue' AND weight > 2500".
"JMSType = 'car' AND color = 'blue' AND weight > 2500"

2> {ok, Tokens, _} = rabbit_jms_selector_lexer:string(String).
{ok,[{identifier,1,<<"JMSType">>},
     {'=',1},
     {string,1,<<"car">>},
     {'AND',1},
     {identifier,1,<<"color">>},
     {'=',1},
     {string,1,<<"blue">>},
     {'AND',1},
     {identifier,1,<<"weight">>},
     {'>',1},
     {integer,1,2500}],
    1}

3> {ok, Expr} = rabbit_jms_selector_parser:parse(Tokens).
{ok,{'and',{'and',{'=',{identifier,<<"JMSType">>},
                       {string,<<"car">>}},
                  {'=',{identifier,<<"color">>},{string,<<"blue">>}}},
           {'>',{identifier,<<"weight">>},{integer,2500}}}}

4> Headers = [{<<"JMSType">>, <<"car">>}, {<<"color">>, <<"blue">>}, {<<"weight">>, 3000}].
[{<<"JMSType">>,<<"car">>},
 {<<"color">>,<<"blue">>},
 {<<"weight">>,3000}]

5> rabbit_fifo_filter:evaluate(Expr, Headers).
true
```
This commit is contained in:
David Ansari 2025-05-05 18:00:30 +02:00
parent 1baf82027e
commit c26cb9fec7
6 changed files with 3935 additions and 0 deletions

View File

@ -364,6 +364,9 @@ ifdef TRACE_SUPERVISOR2
RMQ_ERLC_OPTS += -DTRACE_SUPERVISOR2=true
endif
# https://www.erlang.org/doc/apps/parsetools/leex.html#file/2
export ERL_COMPILER_OPTIONS := deterministic
# --------------------------------------------------------------------
# Documentation.
# --------------------------------------------------------------------

267
deps/rabbit/src/rabbit_fifo_filter.erl vendored Normal file
View File

@ -0,0 +1,267 @@
-module(rabbit_fifo_filter).
-export([evaluate/2]).
%% Evaluates a parsed JMS selector like expression against message headers.
-spec evaluate(term(), [{binary(), term()}]) -> boolean().
evaluate(Expression, Headers) ->
case eval(Expression, Headers) of
true -> true;
_ -> false
end.
%% Literals
eval({Type, Value}, _Headers)
when Type =:= integer orelse
Type =:= float orelse
Type =:= boolean orelse
Type =:= string ->
Value;
%% Identifier lookup
eval({identifier, Name}, Headers) ->
proplists:get_value(Name, Headers);
%% Logical operators
%%
%% Table 3-4 in
%% https://jakarta.ee/specifications/messaging/3.1/jakarta-messaging-spec-3.1#null-values
eval({'and', Expr1, Expr2}, Headers) ->
case eval(Expr1, Headers) of
true ->
case eval(Expr2, Headers) of
true -> true;
false -> false;
_ -> undefined
end;
false ->
% Short-circuit
false;
undefined ->
case eval(Expr2, Headers) of
false -> false;
_ -> undefined
end;
_ ->
undefined
end;
%% Table 3-5 in
%% https://jakarta.ee/specifications/messaging/3.1/jakarta-messaging-spec-3.1#null-values
eval({'or', Expr1, Expr2}, Headers) ->
case eval(Expr1, Headers) of
true ->
%% Short-circuit
true;
false ->
case eval(Expr2, Headers) of
true -> true;
false -> false;
_ -> undefined
end;
undefined ->
case eval(Expr2, Headers) of
true -> true;
_ -> undefined
end;
_ ->
undefined
end;
%% Table 3-6 in
%% https://jakarta.ee/specifications/messaging/3.1/jakarta-messaging-spec-3.1#null-values
eval({'not', Expr}, Headers) ->
case eval(Expr, Headers) of
true -> false;
false -> true;
_ -> undefined
end;
%% Comparison operators
eval({'=' = Op, Expr1, Expr2}, Headers) ->
compare(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'<>' = Op, Expr1, Expr2}, Headers) ->
compare(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'>' = Op, Expr1, Expr2}, Headers) ->
compare(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'<' = Op, Expr1, Expr2}, Headers) ->
compare(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'>=' = Op, Expr1, Expr2}, Headers) ->
compare(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'<=' = Op, Expr1, Expr2}, Headers) ->
compare(Op, eval(Expr1, Headers), eval(Expr2, Headers));
%% Arithmetic operators
eval({'+' = Op, Expr1, Expr2}, Headers) ->
arithmetic(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'-' = Op, Expr1, Expr2}, Headers) ->
arithmetic(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'*' = Op, Expr1, Expr2}, Headers) ->
arithmetic(Op, eval(Expr1, Headers), eval(Expr2, Headers));
eval({'/' = Op, Expr1, Expr2}, Headers) ->
arithmetic(Op, eval(Expr1, Headers), eval(Expr2, Headers));
%% Unary operators
eval({unary_plus, Expr}, Headers) ->
Val = eval(Expr, Headers),
case is_number(Val) of
true -> Val;
false -> undefined
end;
eval({unary_minus, Expr}, Headers) ->
Val = eval(Expr, Headers),
case is_number(Val) of
true -> -Val;
false -> undefined
end;
%% Special operators
eval({'between', Expr, From, To}, Headers) ->
Value = eval(Expr, Headers),
FromVal = eval(From, Headers),
ToVal = eval(To, Headers),
between(Value, FromVal, ToVal);
eval({'not_between', Expr, From, To}, Headers) ->
case eval({'between', Expr, From, To}, Headers) of
true -> false;
false -> true;
_ -> undefined
end;
eval({'in', Expr, ValueList}, Headers) ->
Value = eval(Expr, Headers),
is_in(Value, [eval(Item, Headers) || Item <- ValueList]);
eval({'not_in', Expr, ValueList}, Headers) ->
case eval({'in', Expr, ValueList}, Headers) of
true -> false;
false -> true;
_ -> undefined
end;
eval({'like', Expr, Pattern, Escape}, Headers) ->
Value = eval(Expr, Headers),
PatternVal = eval(Pattern, Headers),
EscapeVal = case Escape of
no_escape -> no_escape;
_ -> eval(Escape, Headers)
end,
is_like(Value, PatternVal, EscapeVal);
eval({'not_like', Expr, Pattern, Escape}, Headers) ->
case eval({'like', Expr, Pattern, Escape}, Headers) of
true -> false;
false -> true;
_ -> undefined
end;
eval({'is_null', Expr}, Headers) ->
eval(Expr, Headers) =:= undefined;
eval({'is_not_null', Expr}, Headers) ->
eval(Expr, Headers) =/= undefined;
%% Default case for unknown expressions
eval(Value, _Headers)
when is_binary(Value) orelse
is_number(Value) orelse
is_boolean(Value) ->
Value;
eval(_, _) ->
undefined.
%% Helper functions
%% "Comparison or arithmetic with an unknown value always yields an unknown value."
compare(_, undefined, _) -> undefined;
compare(_, _, undefined) -> undefined;
compare('=', Left, Right) -> Left == Right;
compare('<>', Left, Right) -> Left /= Right;
compare('>', Left, Right) -> Left > Right;
compare('<', Left, Right) -> Left < Right;
compare('>=', Left, Right) -> Left >= Right;
compare('<=', Left, Right) -> Left =< Right.
arithmetic(_, undefined, _) ->
undefined;
arithmetic(_, _, undefined) ->
undefined;
arithmetic('+', Left, Right) when is_number(Left) andalso is_number(Right) ->
Left + Right;
arithmetic('-', Left, Right) when is_number(Left) andalso is_number(Right) ->
Left - Right;
arithmetic('*', Left, Right) when is_number(Left) andalso is_number(Right) ->
Left * Right;
arithmetic('/', Left, Right) when Right =:= 0 andalso Left > 0 ->
infinity;
arithmetic('/', Left, Right) when Right =:= 0 andalso Left < 0 ->
'-infinity';
arithmetic('/', Left, Right) when Right =:= 0 andalso Left =:= 0 ->
'NaN';
arithmetic('/', Left, Right) when is_integer(Left) andalso is_integer(Right) ->
Left div Right;
arithmetic('/', Left, Right) when is_number(Left) andalso is_number(Right) ->
Left / Right;
arithmetic(_, _, _) ->
undefined.
between(Value, From, To)
when Value =:= undefined orelse
From =:= undefined orelse
To =:= undefined ->
undefined;
between(Value, From, To) ->
From =< Value andalso Value =< To.
is_in(undefined, _) ->
undefined;
is_in(Value, List) ->
lists:member(Value, List).
is_like(Value, Pattern, Escape)
when is_binary(Value) andalso
is_binary(Pattern) ->
RegexPattern = like_to_regex(Pattern, Escape),
case re:run(Value, RegexPattern, [{capture, none}]) of
match -> true;
nomatch -> false
end;
is_like(_, _, _) ->
undefined.
%% Convert LIKE pattern to regex
%%
%% TODO compilation should happen when the consumer attaches.
%% Should this happen within rabbit_jms_selector_parser.yrl?
like_to_regex(Pattern, Escape) ->
{ok, Regex} = convert_like_to_regex(binary_to_list(Pattern), Escape),
{ok, MP} = re:compile(<<"^", Regex/binary, "$">>),
MP.
convert_like_to_regex(Pattern, Escape) ->
convert_like_to_regex(Pattern, [], Escape).
convert_like_to_regex([], Acc, _) ->
{ok, iolist_to_binary(lists:reverse(Acc))};
convert_like_to_regex([Esc, Char | Rest], Acc, Esc) when Esc =/= no_escape ->
% Escaped character - take literally
convert_like_to_regex(Rest, [escape_regex_char(Char) | Acc], Esc);
convert_like_to_regex([$% | Rest], Acc, Esc) ->
% % means any sequence of characters (including none)
convert_like_to_regex(Rest, [".*" | Acc], Esc);
convert_like_to_regex([$_ | Rest], Acc, Esc) ->
% _ means any single character
convert_like_to_regex(Rest, [$. | Acc], Esc);
convert_like_to_regex([Char | Rest], Acc, Esc) ->
% Regular character - escape for regex
convert_like_to_regex(Rest, [escape_regex_char(Char) | Acc], Esc).
%% Escape special regex characters
escape_regex_char(Char)
when Char =:= $. orelse Char =:= $* orelse Char =:= $+ orelse
Char =:= $? orelse Char =:= $^ orelse Char =:= $$ orelse
Char =:= $[ orelse Char =:= $] orelse Char =:= $( orelse
Char =:= $) orelse Char =:= ${ orelse Char =:= $} orelse
Char =:= $| orelse Char =:= $\\ ->
[$\\, Char];
escape_regex_char(Char) ->
Char.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
%%% This is the definitions file for JMS message selectors:
%%% https://jakarta.ee/specifications/messaging/3.1/jakarta-messaging-spec-3.1#message-selector
%%%
%%% To manually generate the scanner file rabbit_jms_selector_lexer.erl run:
%%% leex:file("rabbit_jms_selector_lexer.xrl", [deterministic]).
Definitions.
WHITESPACE = [\s\t\n\r]
DIGIT = [0-9]
INT = {DIGIT}+
FLOAT = {DIGIT}+\.{DIGIT}+([eE][\+\-]?{INT})?
EXPONENT = [0-9]+[eE][\+\-]?[0-9]+
IDENTIFIER = [a-zA-Z_$][a-zA-Z0-9_$]*
STRING = '([^']|'')*'
Rules.
{WHITESPACE}+ : skip_token.
% Logical operators (case insensitive)
[aA][nN][dD] : {token, {'AND', TokenLine}}.
[oO][rR] : {token, {'OR', TokenLine}}.
[nN][oO][tT] : {token, {'NOT', TokenLine}}.
% Special operators (case insensitive)
[bB][eE][tT][wW][eE][eE][nN] : {token, {'BETWEEN', TokenLine}}.
[lL][iI][kK][eE] : {token, {'LIKE', TokenLine}}.
[iI][nN] : {token, {'IN', TokenLine}}.
[iI][sS] : {token, {'IS', TokenLine}}.
[nN][uU][lL][lL] : {token, {'NULL', TokenLine}}.
[eE][sS][cC][aA][pP][eE] : {token, {'ESCAPE', TokenLine}}.
% Boolean literals (case insensitive)
[tT][rR][uU][eE] : {token, {boolean, TokenLine, true}}.
[fF][aA][lL][sS][eE] : {token, {boolean, TokenLine, false}}.
% Comparison operators
= : {token, {'=', TokenLine}}.
<> : {token, {'<>', TokenLine}}.
>= : {token, {'>=', TokenLine}}.
<= : {token, {'<=', TokenLine}}.
> : {token, {'>', TokenLine}}.
< : {token, {'<', TokenLine}}.
% Arithmetic operators
\+ : {token, {'+', TokenLine}}.
- : {token, {'-', TokenLine}}.
\* : {token, {'*', TokenLine}}.
/ : {token, {'/', TokenLine}}.
% Parentheses and comma
\( : {token, {'(', TokenLine}}.
\) : {token, {')', TokenLine}}.
, : {token, {',', TokenLine}}.
% Literals
{INT} : {token, {integer, TokenLine, list_to_integer(TokenChars)}}.
{FLOAT} : {token, {float, TokenLine, list_to_float(TokenChars)}}.
{EXPONENT} : {token, {float, TokenLine, list_to_float(TokenChars)}}.
{STRING} : {token, {string, TokenLine, process_string(TokenChars)}}.
{IDENTIFIER} : {token, {identifier, TokenLine, list_to_binary(TokenChars)}}.
% Catch any other characters as errors
. : {error, {illegal_character, TokenChars}}.
Erlang code.
process_string(Chars) ->
%% remove surrounding quotes
Chars1 = lists:sublist(Chars, 2, length(Chars) - 2),
Bin = unicode:characters_to_binary(Chars1),
process_escaped_quotes(Bin).
process_escaped_quotes(Binary) ->
binary:replace(Binary, <<"''">>, <<"'">>, [global]).

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,119 @@
%%% This is the grammar file for JMS message selectors:
%%% https://jakarta.ee/specifications/messaging/3.1/jakarta-messaging-spec-3.1#message-selector
%%%
%%% To manually generate the parser file rabbit_jms_selector_parser.erl run:
%%% yecc:file("rabbit_jms_selector_parser.yrl", [deterministic]).
Nonterminals
selector
conditional_expr
comparison_expr
logical_expr
additive_expr
multiplicative_expr
unary_expr
primary
literal
identifier_expr
string_list
string_item
between_expr
in_expr
like_expr
is_null_expr.
Terminals
integer float boolean string identifier
'=' '<>' '>' '<' '>=' '<='
'+' '-' '*' '/'
'AND' 'OR' 'NOT'
'BETWEEN' 'LIKE' 'IN' 'IS' 'NULL' 'ESCAPE'
'(' ')' ','.
Rootsymbol selector.
%% operator precedences (lowest to highest)
Left 100 'OR'.
Left 200 'AND'.
Nonassoc 300 '=' '<>' '>' '<' '>=' '<='.
Left 400 '+' '-'.
Left 500 '*' '/'.
Unary 600 'NOT'.
%% "A selector is a conditional expression"
selector -> conditional_expr : '$1'.
%% Conditional expressions
conditional_expr -> logical_expr : '$1'.
%% Logical expressions
logical_expr -> logical_expr 'AND' logical_expr : {'and', '$1', '$3'}.
logical_expr -> logical_expr 'OR' logical_expr : {'or', '$1', '$3'}.
logical_expr -> 'NOT' logical_expr : {'not', '$2'}.
logical_expr -> comparison_expr : '$1'.
%% Comparison expressions
comparison_expr -> additive_expr '=' additive_expr : {'=', '$1', '$3'}.
comparison_expr -> additive_expr '<>' additive_expr : {'<>', '$1', '$3'}.
comparison_expr -> additive_expr '>' additive_expr : {'>', '$1', '$3'}.
comparison_expr -> additive_expr '<' additive_expr : {'<', '$1', '$3'}.
comparison_expr -> additive_expr '>=' additive_expr : {'>=', '$1', '$3'}.
comparison_expr -> additive_expr '<=' additive_expr : {'<=', '$1', '$3'}.
comparison_expr -> between_expr : '$1'.
comparison_expr -> like_expr : '$1'.
comparison_expr -> in_expr : '$1'.
comparison_expr -> is_null_expr : '$1'.
comparison_expr -> additive_expr : '$1'.
%% BETWEEN expression
between_expr -> additive_expr 'BETWEEN' additive_expr 'AND' additive_expr : {'between', '$1', '$3', '$5'}.
between_expr -> additive_expr 'NOT' 'BETWEEN' additive_expr 'AND' additive_expr : {'not_between', '$1', '$4', '$6'}.
%% LIKE expression
like_expr -> additive_expr 'LIKE' additive_expr : {'like', '$1', '$3', no_escape}.
like_expr -> additive_expr 'LIKE' additive_expr 'ESCAPE' additive_expr : {'like', '$1', '$3', '$5'}.
like_expr -> additive_expr 'NOT' 'LIKE' additive_expr : {'not_like', '$1', '$4', no_escape}.
like_expr -> additive_expr 'NOT' 'LIKE' additive_expr 'ESCAPE' additive_expr : {'not_like', '$1', '$4', '$6'}.
%% IN expression
in_expr -> additive_expr 'IN' '(' string_list ')' : {'in', '$1', '$4'}.
in_expr -> additive_expr 'NOT' 'IN' '(' string_list ')' : {'not_in', '$1', '$5'}.
string_list -> string_item : ['$1'].
string_list -> string_item ',' string_list : ['$1'|'$3'].
string_item -> string : extract_value('$1').
%% IS NULL expression
is_null_expr -> identifier_expr 'IS' 'NULL' : {'is_null', '$1'}.
is_null_expr -> identifier_expr 'IS' 'NOT' 'NULL' : {'is_not_null', '$1'}.
%% Arithmetic expressions
additive_expr -> additive_expr '+' multiplicative_expr : {'+', '$1', '$3'}.
additive_expr -> additive_expr '-' multiplicative_expr : {'-', '$1', '$3'}.
additive_expr -> multiplicative_expr : '$1'.
multiplicative_expr -> multiplicative_expr '*' unary_expr : {'*', '$1', '$3'}.
multiplicative_expr -> multiplicative_expr '/' unary_expr : {'/', '$1', '$3'}.
multiplicative_expr -> unary_expr : '$1'.
%% Handle unary operators through grammar structure instead of precedence
unary_expr -> '+' primary : {unary_plus, '$2'}.
unary_expr -> '-' primary : {unary_minus, '$2'}.
unary_expr -> primary : '$1'.
%% Primary expressions
primary -> '(' conditional_expr ')' : '$2'.
primary -> literal : '$1'.
primary -> identifier_expr : '$1'.
%% Identifiers (header fields or property references)
identifier_expr -> identifier : {identifier, extract_value('$1')}.
%% Literals
literal -> integer : {integer, extract_value('$1')}.
literal -> float : {float, extract_value('$1')}.
literal -> string : {string, extract_value('$1')}.
literal -> boolean : {boolean, extract_value('$1')}.
Erlang code.
extract_value({_Token, _Line, Value}) -> Value.