Compare commits

...

2 Commits

Author SHA1 Message Date
Sashan 6aa496a6fb
Merge 60677b1a11 into f12f8cc035 2025-07-30 16:30:20 +02:00
Alexandr Nedvedicky 60677b1a11 Make SSL_poll() and SSL_shutdown() better friends
Current QUIC stack may leave connection monitored by SSL_poll() to stale
during regular shutdown.  The issue is triggered when ACK for client's
FIN gets delayed. The sequeance of operations to trigger
the stale of QUIC connection at client goes as follows:

	- application calls SSL_shutdown() on connection,
	  the shutdown can not proceed, because bi-directional
	  stream must be flushed. The client awaits ACK from
	  server acknowledging reception of FIN on client's stream

	- the stream object gets destroyed, because application
	  received all data from server.

	- application updates poll set and passes to SSL_poll()

	- ssl poll ticks the engine. Engine receives delayed ACK
	  and marks stream as flushed. At this point the SSL_shutdown()
	  operation may proceed given the application calls the
	  SSL_shutdown(). However there is no mechanism to make SSL_poll()
	  return so application is unable to proceed with its event
	  loop where SSL_shutdown() may get called.

This change introduces ossl_quic_channel_notify_flush_done() function
which notifies channel when all streams are flushed (all FINs got ACKed).

The first thing SSL_shudown() does it calls ossl_quic_stream_map_begin_shutdown_flush().
The function walks list of all streams attached to channel and notes how many
streams is missing ACK for their FIN. In our test case it finds one such stream.
Call to SSL_shutdown() returns and application destroys the SSL stream object
and updates a poll set.

SSL_poll() gets called. The QUIC stack (engine) gets ticked and reads data
from socket. It processes delayed ACK now. The ACK-manager updates the
stream notifying the server ACKs the FIN sent by client. The stream
is flushed now. Thw shutdown_flush_done() for stream gets called on
behalf of ACK manager.

The shutdown_flush_done() does two things:
	- it marks stream as flushed
	- it decrements the num_shutdown_flush counter initialized
	  be earlier call to ossl_quic_stream_map_begin_shutdown_flush()
	  called by SSL_shutdown()
The change here calls ossl_quic_channel_notify_flush_done() when
num_shutdown_flush reaches zero.

The ossl_quic_channel_notify_flush_done() then calls function
ossl_quic_channel_notify_flush_done(), which just moves the state
of the channel (connection) from active to terminating state.
The change of channel state is sufficent for SSL_poll() to
signal _EC event on connection.

Once application receives _EC event on connection it should
check the state of the channel/reason of error. In regular case
the error/channel state hints application to call SSL_shutdown()
so connection object can proceed with connection shutdown.
The SSL_shutdown() call done now moves channel to terminated
state. So the next call to SSL_poll() can signal _ECD which
tells application it's time to stop polling on SSL connection
object and destroy it.

Fixes openssl/project#1291
2025-07-30 00:03:43 +02:00
6 changed files with 128 additions and 43 deletions

View File

@ -345,6 +345,8 @@ int ossl_quic_channel_is_terminated(const QUIC_CHANNEL *ch);
int ossl_quic_channel_is_active(const QUIC_CHANNEL *ch);
int ossl_quic_channel_is_handshake_complete(const QUIC_CHANNEL *ch);
int ossl_quic_channel_is_handshake_confirmed(const QUIC_CHANNEL *ch);
int ossl_quic_channel_is_server(const QUIC_CHANNEL *ch);
void ossl_quic_channel_notify_flush_done(QUIC_CHANNEL *ch);
QUIC_PORT *ossl_quic_channel_get0_port(QUIC_CHANNEL *ch);
QUIC_ENGINE *ossl_quic_channel_get0_engine(QUIC_CHANNEL *ch);
@ -470,6 +472,8 @@ int ossl_quic_bind_channel(QUIC_CHANNEL *ch, const BIO_ADDR *peer,
const QUIC_CONN_ID *scid, const QUIC_CONN_ID *dcid,
const QUIC_CONN_ID *odcid);
void ossl_quic_channel_set_tcause(QUIC_CHANNEL *ch, uint64_t app_error_code,
const char *app_reason);
# endif
#endif

View File

@ -554,6 +554,7 @@ static ossl_inline ossl_unused size_t ossl_quic_stream_recv_pending(const QUIC_S
*/
struct quic_stream_map_st {
LHASH_OF(QUIC_STREAM) *map;
QUIC_CHANNEL *ch;
QUIC_STREAM_LIST_NODE active_list;
QUIC_STREAM_LIST_NODE accept_list;
QUIC_STREAM_LIST_NODE ready_for_gc_list;
@ -564,7 +565,6 @@ struct quic_stream_map_st {
void *get_stream_limit_cb_arg;
QUIC_RXFC *max_streams_bidi_rxfc;
QUIC_RXFC *max_streams_uni_rxfc;
int is_server;
};
/*
@ -585,7 +585,7 @@ int ossl_quic_stream_map_init(QUIC_STREAM_MAP *qsm,
void *get_stream_limit_cb_arg,
QUIC_RXFC *max_streams_bidi_rxfc,
QUIC_RXFC *max_streams_uni_rxfc,
int is_server);
QUIC_CHANNEL *ch);
/*
* Any streams still in the map will be released as though

View File

@ -249,7 +249,7 @@ static int ch_init(QUIC_CHANNEL *ch)
if (!ossl_quic_stream_map_init(&ch->qsm, get_stream_limit, ch,
&ch->max_streams_bidi_rxfc,
&ch->max_streams_uni_rxfc,
ch->is_server))
ch))
goto err;
ch->have_qsm = 1;
@ -649,6 +649,45 @@ int ossl_quic_channel_is_term_any(const QUIC_CHANNEL *ch)
|| ossl_quic_channel_is_terminated(ch);
}
int ossl_quic_channel_is_server(const QUIC_CHANNEL *ch)
{
return ch->is_server;
}
void ossl_quic_channel_notify_flush_done(QUIC_CHANNEL *ch)
{
ch_record_state_transition(ch, ch->terminate_cause.remote
? QUIC_CHANNEL_STATE_TERMINATING_DRAINING
: QUIC_CHANNEL_STATE_TERMINATING_CLOSING);
/*
* RFC 9000 s. 10.2 Immediate Close
* These states SHOULD persist for at least three times
* the current PTO interval as defined in [QUIC-RECOVERY].
*/
ch->terminate_deadline
= ossl_time_add(get_time(ch),
ossl_time_multiply(ossl_ackm_get_pto_duration(ch->ackm), 3));
if (!ch->terminate_cause.remote) {
OSSL_QUIC_FRAME_CONN_CLOSE f = {0};
/* best effort */
f.error_code = ch->terminate_cause.error_code;
f.frame_type = ch->terminate_cause.frame_type;
f.is_app = ch->terminate_cause.app;
f.reason = (char *)ch->terminate_cause.reason;
f.reason_len = ch->terminate_cause.reason_len;
ossl_quic_tx_packetiser_schedule_conn_close(ch->txp, &f);
/*
* RFC 9000 s. 10.2.2 Draining Connection State:
* An endpoint that receives a CONNECTION_CLOSE frame MAY
* send a single packet containing a CONNECTION_CLOSE
* frame before entering the draining state, using a
* NO_ERROR code if appropriate
*/
ch->conn_close_queued = 1;
}
}
const QUIC_TERMINATE_CAUSE *
ossl_quic_channel_get_terminate_cause(const QUIC_CHANNEL *ch)
{
@ -3140,14 +3179,17 @@ int ossl_quic_channel_on_handshake_confirmed(QUIC_CHANNEL *ch)
static void copy_tcause(QUIC_TERMINATE_CAUSE *dst,
const QUIC_TERMINATE_CAUSE *src)
{
/*
* do not override reason once it got set.
*/
if (dst->reason != NULL)
return;
dst->error_code = src->error_code;
dst->frame_type = src->frame_type;
dst->app = src->app;
dst->remote = src->remote;
dst->reason = NULL;
dst->reason_len = 0;
if (src->reason != NULL && src->reason_len > 0) {
size_t l = src->reason_len;
char *r;
@ -3168,6 +3210,18 @@ static void copy_tcause(QUIC_TERMINATE_CAUSE *dst,
}
}
void ossl_quic_channel_set_tcause(QUIC_CHANNEL *ch, uint64_t app_error_code,
const char *app_reason)
{
QUIC_TERMINATE_CAUSE tcause = {0};
tcause.app = 1;
tcause.error_code = app_error_code;
tcause.reason = app_reason;
tcause.reason_len = app_reason != NULL ? strlen(app_reason) : 0;
copy_tcause(&ch->terminate_cause, &tcause);
}
static void ch_start_terminating(QUIC_CHANNEL *ch,
const QUIC_TERMINATE_CAUSE *tcause,
int force_immediate)
@ -3189,38 +3243,7 @@ static void ch_start_terminating(QUIC_CHANNEL *ch,
ossl_qlog_event_connectivity_connection_closed(ch_get_qlog(ch), tcause);
if (!force_immediate) {
ch_record_state_transition(ch, tcause->remote
? QUIC_CHANNEL_STATE_TERMINATING_DRAINING
: QUIC_CHANNEL_STATE_TERMINATING_CLOSING);
/*
* RFC 9000 s. 10.2 Immediate Close
* These states SHOULD persist for at least three times
* the current PTO interval as defined in [QUIC-RECOVERY].
*/
ch->terminate_deadline
= ossl_time_add(get_time(ch),
ossl_time_multiply(ossl_ackm_get_pto_duration(ch->ackm),
3));
if (!tcause->remote) {
OSSL_QUIC_FRAME_CONN_CLOSE f = {0};
/* best effort */
f.error_code = ch->terminate_cause.error_code;
f.frame_type = ch->terminate_cause.frame_type;
f.is_app = ch->terminate_cause.app;
f.reason = (char *)ch->terminate_cause.reason;
f.reason_len = ch->terminate_cause.reason_len;
ossl_quic_tx_packetiser_schedule_conn_close(ch->txp, &f);
/*
* RFC 9000 s. 10.2.2 Draining Connection State:
* An endpoint that receives a CONNECTION_CLOSE frame MAY
* send a single packet containing a CONNECTION_CLOSE
* frame before entering the draining state, using a
* NO_ERROR code if appropriate
*/
ch->conn_close_queued = 1;
}
ossl_quic_channel_notify_flush_done(ch);
} else {
ch_on_terminating_timeout(ch);
}

View File

@ -1501,6 +1501,36 @@ static int quic_shutdown_peer_wait(void *arg)
return ossl_quic_channel_is_term_any(qc->ch);
}
/*
* This function deals with local shutdown.
* Function must consider those scenarios:
* - blocking mode (1)
* - non-blocking mode (2)
* - non-blocking mode with assistance from SSL_poll() (3)
* (1) The function completes shutdown then returns back to caller.
* To complete shutdown we must do:
* - flush all streams, unless we got SSL_SHUTDOWN_FLAG_NO_STREAM_FLUSH,
* which means the connection is closed without waiting for streams
* to deliver data written by application.
* - let remote peer know local application is going to close connection,
* unless we got SSL_SHUTDOWN_FLAG_WAIT_PEER in which case we await
* until remote peer closes the connection
* - wait for peer to confirm connection close
*
* (2) The function does not block waiting for streams to be flushed
* nor for peer to close connection (when running with SSL_SHUTDOWN_FLAG_WAIT_PEER)
* Application is supposed to call SSL_shutdown() repeatedly as long as
* function returns 0 which indicates the operation is still in progress.
*
* (3) In this case application uses SSL_poll() to wait for completion
* of each step of shutdown process. Application calls SSL_shutdown()
* to start with connection shutdown. The function does not block.
* Application then uses SSL_poll() on connection object to monitor
* progress of shutdown. The SSL_poll() indicates progress by signaling
* SSL_POLL_EVENT_EC event. Application must check connection object
* for error. If no error is indicated, then application must call
* SSL_shutdown() to move to the next stop in shutdown process.
*/
QUIC_TAKES_LOCK
int ossl_quic_conn_shutdown(SSL *s, uint64_t flags,
const SSL_SHUTDOWN_EX_ARGS *args,
@ -1527,6 +1557,19 @@ int ossl_quic_conn_shutdown(SSL *s, uint64_t flags,
return 1;
}
if (!wait_peer) {
/*
* Set shutdown reason now when local application wants to do
* active close (does not waant to wait for peer to close th
* connection). The reason will be sent to peer with connection
* close notification as soon as streams will be flushed.
*/
if (args != NULL) {
ossl_quic_channel_set_tcause(ctx.qc->ch, args->quic_error_code,
args->quic_reason);
}
}
/* Phase 1: Stream Flushing */
if (!wait_peer && stream_flush) {
qc_shutdown_flush_init(ctx.qc);

View File

@ -9,6 +9,7 @@
#include "internal/quic_stream_map.h"
#include "internal/nelem.h"
#include "internal/quic_channel.h"
/*
* QUIC Stream Map
@ -92,7 +93,7 @@ int ossl_quic_stream_map_init(QUIC_STREAM_MAP *qsm,
void *get_stream_limit_cb_arg,
QUIC_RXFC *max_streams_bidi_rxfc,
QUIC_RXFC *max_streams_uni_rxfc,
int is_server)
QUIC_CHANNEL *ch)
{
qsm->map = lh_QUIC_STREAM_new(hash_stream, cmp_stream);
qsm->active_list.prev = qsm->active_list.next = &qsm->active_list;
@ -111,7 +112,7 @@ int ossl_quic_stream_map_init(QUIC_STREAM_MAP *qsm,
qsm->get_stream_limit_cb_arg = get_stream_limit_cb_arg;
qsm->max_streams_bidi_rxfc = max_streams_bidi_rxfc;
qsm->max_streams_uni_rxfc = max_streams_uni_rxfc;
qsm->is_server = is_server;
qsm->ch = ch;
return 1;
}
@ -156,7 +157,7 @@ QUIC_STREAM *ossl_quic_stream_map_alloc(QUIC_STREAM_MAP *qsm,
s->id = stream_id;
s->type = type;
s->as_server = qsm->is_server;
s->as_server = ossl_quic_channel_is_server(qsm->ch);
s->send_state = (ossl_quic_stream_is_local_init(s)
|| ossl_quic_stream_is_bidi(s))
? QUIC_SSTREAM_STATE_READY
@ -329,7 +330,7 @@ void ossl_quic_stream_map_update_state(QUIC_STREAM_MAP *qsm, QUIC_STREAM *s)
{
int should_be_active, allowed_by_stream_limit = 1;
if (ossl_quic_stream_is_server_init(s) == qsm->is_server) {
if (ossl_quic_stream_is_server_init(s) == ossl_quic_channel_is_server(qsm->ch)) {
int is_uni = !ossl_quic_stream_is_bidi(s);
uint64_t stream_ordinal = s->id >> 2;
@ -425,6 +426,14 @@ static void shutdown_flush_done(QUIC_STREAM_MAP *qsm, QUIC_STREAM *qs)
assert(qsm->num_shutdown_flush > 0);
qs->shutdown_flush = 0;
--qsm->num_shutdown_flush;
/*
* when num_shutdown_flush becomes zero we need to poke
* SSL_poll() it's time to poke to SSL_shutdown() to proceed
* with shutdown process as all streams are gone (flushed).
*/
if (qsm->num_shutdown_flush == 0)
ossl_quic_channel_notify_flush_done(qsm->ch);
}
int ossl_quic_stream_map_notify_totally_acked(QUIC_STREAM_MAP *qsm,

View File

@ -123,6 +123,7 @@ static int helper_init(struct helper *h)
{
int rc = 0;
size_t i;
static char fake_channel[4096] = { 0 };
memset(h, 0, sizeof(*h));
@ -186,10 +187,15 @@ static int helper_init(struct helper *h)
/* is_server */0)))
goto err;
/*
* fake_channel is ugly hack which is good enough for testing.
* we enable qsm to safely dereference a memory when it
* calls ossl_quic_channel_is_serve().
*/
if (!TEST_true(ossl_quic_stream_map_init(&h->qsm, NULL, NULL,
&h->max_streams_bidi_rxfc,
&h->max_streams_uni_rxfc,
/*is_server=*/0)))
(QUIC_CHANNEL *)fake_channel)))
goto err;
h->have_qsm = 1;