Avoid crash when client disconnects before server handles MQTT CONNECT

In case of a resource alarm, the server accepts incoming TCP
connections, but does not read from the socket.
When a client connects during a resource alarm, the MQTT CONNECT frame
is therefore not processed.

While the resource alarm is ongoing, the client might time out waiting
on a CONNACK MQTT packet.

When the resource alarm clears on the server, the MQTT CONNECT frame
gets processed.

Prior to this commit, this results in the following crash on the server:
```
** Reason for termination ==
** {{badmatch,{error,einval}},
    [{rabbit_mqtt_processor,process_login,4,
                            [{file,"rabbit_mqtt_processor.erl"},{line,585}]},
     {rabbit_mqtt_processor,process_request,3,
                            [{file,"rabbit_mqtt_processor.erl"},{line,143}]},
     {rabbit_mqtt_processor,process_frame,2,
                            [{file,"rabbit_mqtt_processor.erl"},{line,69}]},
     {rabbit_mqtt_reader,process_received_bytes,2,
                         [{file,"src/rabbit_mqtt_reader.erl"},{line,307}]},
```

After this commit, the server just logs:
```
[error] <0.887.0> MQTT protocol error on connection 127.0.0.1:55725 -> 127.0.0.1:1883: peername_not_known
```

In case the client already disconnected, we want the server to bail out
early, i.e. not authenticating and registering the client at all
since that can be expensive when many clients connected while the
resource alarm was ongoing.

To detect whether the client disconnected, we rely on inet:peername/1
which will return an error when the peer is not connected anymore.

Ideally we could use some better mechanism for detecting whether the
client disconnected.

The MQTT reader does receive a {tcp_closed, Socket} message once the
socket becomes active. However, we don't really want to read frames
ahead (i.e. ahead of the received CONNECT frame), one reason being that:
"Clients are allowed to send further Control Packets immediately
after sending a CONNECT Packet; Clients need not wait for a CONNACK Packet
to arrive from the Server."

Setting socket option `show_econnreset` does not help either because the client
closes the connection normally.

Co-authored-by: Péter Gömöri @gomoripeti
This commit is contained in:
David Ansari 2022-08-25 17:54:30 +02:00
parent e15d12d767
commit 28db862d56
2 changed files with 61 additions and 4 deletions

View File

@ -96,8 +96,7 @@ add_client_id_to_adapter_info(ClientId, #amqp_adapter_info{additional_info = Add
end,
AdapterInfo#amqp_adapter_info{additional_info = AdditionalInfo2}.
process_request(?CONNECT,
#mqtt_frame{ variable = #mqtt_frame_connect{
process_connect(#mqtt_frame{ variable = #mqtt_frame_connect{
username = Username,
password = Password,
proto_ver = ProtoVersion,
@ -199,6 +198,16 @@ process_request(?CONNECT,
?CONNACK_SERVER -> {error, unavailable, PState5};
?CONNACK_INVALID_ID -> {error, invalid_client_id, PState5};
?CONNACK_PROTO_VER -> {error, unsupported_protocol_version, PState5}
end.
process_request(?CONNECT, Frame, PState = #proc_state{socket = Socket}) ->
case rabbit_net:peername(Socket) of
{error, einval} ->
%% Can happen when connection was blocked because of resource alarm
%% and client therefore disconnected due to client side CONNACK timeout.
{error, peername_not_known, PState};
_ ->
process_connect(Frame, PState)
end;
process_request(?PUBACK,
@ -582,8 +591,8 @@ process_login(UserBin, PassBin, ProtoVersion,
adapter_info = AdapterInfo,
ssl_login_name = SslLoginName,
peer_addr = Addr}) ->
{ok, {_, _, _, ToPort}} = rabbit_net:socket_ends(Sock, inbound),
{VHostPickedUsing, {VHost, UsernameBin}} = get_vhost(UserBin, SslLoginName, ToPort),
{ok, {_, LocalPort}} = rabbit_net:sockname(Sock),
{VHostPickedUsing, {VHost, UsernameBin}} = get_vhost(UserBin, SslLoginName, LocalPort),
rabbit_log_connection:debug(
"MQTT vhost picked using ~s",
[human_readable_vhost_lookup_strategy(VHostPickedUsing)]),

View File

@ -19,6 +19,7 @@ groups() ->
[
{non_parallel_tests, [], [
block,
block_connack_timeout,
handle_invalid_frames,
stats,
quorum_session_false,
@ -121,6 +122,53 @@ block(Config) ->
emqttc:disconnect(C).
block_connack_timeout(Config) ->
P = rabbit_ct_broker_helpers:get_node_config(Config, 0, tcp_port_mqtt),
Ports0 = rpc(Config, erlang, ports, []),
ok = rpc(Config, vm_memory_monitor, set_vm_memory_high_watermark, [0.00000001]),
%% Let connection block.
timer:sleep(100),
%% We can still connect via TCP, but CONNECT frame will not be processed on the server.
{ok, Client} = emqttc:start_link([{host, "localhost"},
{port, P},
{client_id, <<"simpleClient">>},
{proto_ver, 3},
{logger, info},
{connack_timeout, 1}]),
timer:sleep(100),
Ports = rpc(Config, erlang, ports, []),
%% Server creates 1 new port to handle our MQTT connection.
[NewPort] = Ports -- Ports0,
{connected, MqttReader} = rpc(Config, erlang, port_info, [NewPort, connected]),
MqttReaderMRef = monitor(process, MqttReader),
unlink(Client),
ClientMRef = monitor(process, Client),
receive
{'DOWN', ClientMRef, process, Client, {shutdown, connack_timeout}} ->
ok
after 2000 ->
ct:fail("missing connack_timeout in client")
end,
%% Unblock connection. CONNECT frame will be processed on the server.
rpc(Config, vm_memory_monitor, set_vm_memory_high_watermark, [0.4]),
receive
{'DOWN', MqttReaderMRef, process, MqttReader, {shutdown, peername_not_known}} ->
%% We expect that MQTT reader process exits with reason peername_not_known
%% because our client already disconnected.
ok
after 2000 ->
ct:fail("missing peername_not_known from server")
end,
%% Ensure that our client is not registered.
[] = rpc(Config, rabbit_mqtt_collector, list, []),
ok.
handle_invalid_frames(Config) ->
N = rpc(Config, ets, info, [connection_metrics, size]),
P = rabbit_ct_broker_helpers:get_node_config(Config, 0, tcp_port_mqtt),