crypto/threads_pthread: rewrite contention data storage to per-tid

This eliminates locking during writing out of the lock contation report
data, which claws back some of the lost performance degradation imposed
by the lock contention reporting instrumentation:

    [Without -DREPORT_RWLOCK_CONTENTION]
    ~/dev/perftools/source$ ./evp_fetch 100
    Average time per fetch call: 4.502162us
    ~/dev/perftools/source$ ./evp_fetch 200
    Average time per fetch call: 8.224920us

    [Before]
    ~/dev/perftools/source$ ./evp_fetch 100
    Average time per fetch call: 13.079795us
    ~/dev/perftools/source$ ./evp_fetch 200
    Average time per fetch call: 23.420235us

    [After]
    ~/dev/perftools/source$ ./evp_fetch 100
    Average time per fetch call: 6.557428us
    ~/dev/perftools/source$ ./evp_fetch 200
    Average time per fetch call: 13.415148us

The downside is that it produces a file for each TID, which floods
the working directory with debug files, but that mich be an acceptable
trade-off.
This commit is contained in:
Eugene Syromiatnikov 2025-07-07 16:03:09 +02:00
parent db6565e5b1
commit 0154dc12a5
3 changed files with 69 additions and 59 deletions

View File

@ -17,8 +17,11 @@
# include <assert.h>
# include <execinfo.h>
# include <fcntl.h>
# include <stdbool.h>
# include <unistd.h>
# include <sys/syscall.h>
# include <sys/uio.h>
# include "internal/time.h"
@ -34,17 +37,10 @@
* to the contention log file. We do this because we want to avoid use
* of the CRYPTO_THREAD api so as to prevent recursive blocking reports.
*/
static FILE *contention_fp = NULL;
static CRYPTO_ONCE init_contention_fp = CRYPTO_ONCE_STATIC_INIT;
static int rwlock_count = 0;
static CRYPTO_ONCE init_contention_data_flag = CRYPTO_ONCE_STATIC_INIT;
pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
CRYPTO_THREAD_LOCAL thread_contention_data;
static void destroy_contention_data(void *data)
{
OPENSSL_free(data);
}
struct stack_info {
unsigned int nptrs;
int write;
@ -55,11 +51,18 @@ struct stack_info {
# define STACKS_COUNT 32
struct stack_traces {
int fd;
int lock_depth;
size_t idx;
struct stack_info stacks[STACKS_COUNT];
};
/* The glibc gettid() definition presents only since 2.30. */
static pid_t get_tid()
{
return syscall(SYS_gettid);
}
# ifdef FIPS_MODULE
# define FIPS_SFX "-fips"
# else
@ -67,10 +70,29 @@ struct stack_traces {
# endif
static void *init_contention_data(void)
{
contention_fp = fopen("lock-contention-log" FIPS_SFX ".txt", "w");
if (contention_fp == NULL)
fprintf(stderr, "Contention log file could not be opened, log will not be recorded\n");
struct stack_traces *traces;
char fname_fmt[] = "lock-contention-log" FIPS_SFX ".%d.txt";
char fname[sizeof(fname_fmt) + sizeof(int) * 3];
traces = OPENSSL_zalloc(sizeof(struct stack_traces));
snprintf(fname, sizeof(fname), fname_fmt, get_tid());
traces->fd = open(fname, O_WRONLY | O_APPEND | O_CLOEXEC | O_CREAT, 0600);
return traces;
}
static void destroy_contention_data(void *data)
{
struct stack_traces *st = data;
close(st->fd);
OPENSSL_free(data);
}
static void init_contention_data_once(void)
{
/*
* Create a thread local key here to store our list of stack traces
* to be printed when we unlock the lock we are holding
@ -84,56 +106,60 @@ static struct stack_traces *get_stack_traces(bool init)
struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
if (!traces && init) {
traces = OPENSSL_zalloc(sizeof(*traces));
traces = init_contention_data();
CRYPTO_THREAD_set_local(&thread_contention_data, traces);
}
return traces;
}
static void print_stack_traces(struct stack_traces *traces, FILE *fptr)
static void print_stack_traces(struct stack_traces *traces)
{
unsigned int j;
struct iovec *iov;
int iovcnt;
pthread_mutex_lock(&log_lock);
while (traces != NULL && traces->idx >= 1) {
traces->idx--;
fprintf(fptr, "lock blocked on %s for %zu usec at time %zu tid %d\n",
dprintf(traces->fd,
"lock blocked on %s for %zu usec at time %zu tid %d\n",
traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ",
ossl_time2us(traces->stacks[traces->idx].duration),
ossl_time2us(traces->stacks[traces->idx].start),
gettid());
get_tid());
if (traces->stacks[traces->idx].strings != NULL) {
for (j = 0; j < traces->stacks[traces->idx].nptrs; j++)
fprintf(fptr, "%s\n", traces->stacks[traces->idx].strings[j]);
free(traces->stacks[traces->idx].strings);
static const char lf = '\n';
iovcnt = traces->stacks[traces->idx].nptrs * 2 + 1;
iov = alloca(iovcnt * sizeof(*iov));
for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) {
iov[2 * j].iov_base = traces->stacks[traces->idx].strings[j];
iov[2 * j].iov_len = strlen(traces->stacks[traces->idx].strings[j]);
iov[2 * j + 1].iov_base = (char *) &lf;
iov[2 * j + 1].iov_len = 1;
}
iov[traces->stacks[traces->idx].nptrs * 2].iov_base = (char *) &lf;
iov[traces->stacks[traces->idx].nptrs * 2].iov_len = 1;
} else {
fprintf(fptr, "No stack trace available\n");
static const char no_bt[] = "No stack trace available\n\n";
iovcnt = 1;
iov = alloca(iovcnt * sizeof(*iov));
iov[0].iov_base = (char *) no_bt;
iov[0].iov_len = sizeof(no_bt) - 1;
}
fprintf(contention_fp, "\n");
writev(traces->fd, iov, iovcnt);
free(traces->stacks[traces->idx].strings);
}
pthread_mutex_unlock(&log_lock);
}
void ossl_init_rwlock_contention_data(void)
{
CRYPTO_THREAD_run_once(&init_contention_fp, init_contention_fp_once);
__atomic_add_fetch(&rwlock_count, 1, __ATOMIC_ACQ_REL);
CRYPTO_THREAD_run_once(&init_contention_data_flag, init_contention_data_once);
}
void ossl_free_rwlock_contention_data(void)
{
/*
* Note: It's possible here that OpenSSL may allocate a lock and immediately
* free it, in which case we would erroneously close the contention log
* prior to the library going on to do more real work. In practice
* that never happens though, and since this is a debug facility
* we don't worry about that here.
*/
if (__atomic_add_fetch(&rwlock_count, -1, __ATOMIC_ACQ_REL) == 0) {
fclose(contention_fp);
contention_fp = NULL;
}
}
static int record_lock_contention(pthread_rwlock_t *lock,
@ -157,7 +183,7 @@ static int record_lock_contention(pthread_rwlock_t *lock,
traces->idx++;
if (traces->idx >= STACKS_COUNT) {
fprintf(stderr, "STACK RECORD OVERFLOW!\n");
print_stack_traces(traces, contention_fp);
print_stack_traces(traces);
}
return 0;
@ -214,11 +240,11 @@ int ossl_rwlock_unlock(pthread_rwlock_t *lock)
{
struct stack_traces *traces = get_stack_traces(false);
if (contention_fp != NULL && traces != NULL) {
if (traces != NULL) {
traces->lock_depth--;
assert(traces->lock_depth >= 0);
if (traces->lock_depth == 0)
print_stack_traces(traces, contention_fp);
print_stack_traces(traces);
}
}

View File

@ -16,15 +16,7 @@
# include "internal/threads_common.h"
# if !defined(__GNUC__) || !defined(__ATOMIC_ACQ_REL) || \
defined(BROKEN_CLANG_ATOMICS) || defined(OPENSSL_NO_STDIO)
/*
* we only enable REPORT_RWLOCK_CONTENTION on clang/gcc when we have
* atomics available. We do this because we need to use an atomic to track
* when we can close the log file. We could use the CRYPTO_atomic_ api
* but that requires lock creation which gets us into a bad recursive loop
* when we try to initialize the file pointer
*/
# if defined(OPENSSL_NO_STDIO)
# ifdef REPORT_RWLOCK_CONTENTION
# warning "RWLOCK CONTENTION REPORTING NOT SUPPORTED, Disabling"
# undef REPORT_RWLOCK_CONTENTION

View File

@ -15,29 +15,21 @@
#
TEMPDIR=$(mktemp -d /tmp/contention.XXXXXX)
LOGFILE=$1
trap "rm -rf $TEMPDIR" EXIT
if [ ! -f $LOGFILE ]
then
echo "No log file found"
exit 1
fi
LOGFILEBASE=$(basename $LOGFILE)
echo "Splitting files"
#
#start by splitting the log into separate stack traces
#
mkdir $TEMPDIR/individual_files
cp $LOGFILE $TEMPDIR/individual_files/$LOGFILEBASE
mkdir "$TEMPDIR/individual_files"
cat "$@" > "$TEMPDIR/individual_files/log"
pushd $TEMPDIR/individual_files/ > /dev/null
awk '
BEGIN {RS = ""; FS = "\n"}
{file_num++; print > ("stacktrace" file_num ".txt")}' ./$LOGFILEBASE
{file_num++; print > ("stacktrace" file_num ".txt")}' ./log
popd > /dev/null
rm -f $TEMPDIR/individual_files/$LOGFILEBASE
rm -f $TEMPDIR/individual_files/log
#
# Make some associative arrays to track our stats