mirror of https://github.com/openssl/openssl.git
crypto/threads_pthread: rewrite contention data storage to per-tid
This eliminates locking during writing out of the lock contation report data, which claws back some of the lost performance degradation imposed by the lock contention reporting instrumentation: [Without -DREPORT_RWLOCK_CONTENTION] ~/dev/perftools/source$ ./evp_fetch 100 Average time per fetch call: 4.502162us ~/dev/perftools/source$ ./evp_fetch 200 Average time per fetch call: 8.224920us [Before] ~/dev/perftools/source$ ./evp_fetch 100 Average time per fetch call: 13.079795us ~/dev/perftools/source$ ./evp_fetch 200 Average time per fetch call: 23.420235us [After] ~/dev/perftools/source$ ./evp_fetch 100 Average time per fetch call: 6.557428us ~/dev/perftools/source$ ./evp_fetch 200 Average time per fetch call: 13.415148us The downside is that it produces a file for each TID, which floods the working directory with debug files, but that mich be an acceptable trade-off.
This commit is contained in:
parent
db6565e5b1
commit
0154dc12a5
|
@ -17,8 +17,11 @@
|
|||
|
||||
# include <assert.h>
|
||||
# include <execinfo.h>
|
||||
# include <fcntl.h>
|
||||
# include <stdbool.h>
|
||||
# include <unistd.h>
|
||||
# include <sys/syscall.h>
|
||||
# include <sys/uio.h>
|
||||
|
||||
# include "internal/time.h"
|
||||
|
||||
|
@ -34,17 +37,10 @@
|
|||
* to the contention log file. We do this because we want to avoid use
|
||||
* of the CRYPTO_THREAD api so as to prevent recursive blocking reports.
|
||||
*/
|
||||
static FILE *contention_fp = NULL;
|
||||
static CRYPTO_ONCE init_contention_fp = CRYPTO_ONCE_STATIC_INIT;
|
||||
static int rwlock_count = 0;
|
||||
static CRYPTO_ONCE init_contention_data_flag = CRYPTO_ONCE_STATIC_INIT;
|
||||
pthread_mutex_t log_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
CRYPTO_THREAD_LOCAL thread_contention_data;
|
||||
|
||||
static void destroy_contention_data(void *data)
|
||||
{
|
||||
OPENSSL_free(data);
|
||||
}
|
||||
|
||||
struct stack_info {
|
||||
unsigned int nptrs;
|
||||
int write;
|
||||
|
@ -55,11 +51,18 @@ struct stack_info {
|
|||
|
||||
# define STACKS_COUNT 32
|
||||
struct stack_traces {
|
||||
int fd;
|
||||
int lock_depth;
|
||||
size_t idx;
|
||||
struct stack_info stacks[STACKS_COUNT];
|
||||
};
|
||||
|
||||
/* The glibc gettid() definition presents only since 2.30. */
|
||||
static pid_t get_tid()
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
}
|
||||
|
||||
# ifdef FIPS_MODULE
|
||||
# define FIPS_SFX "-fips"
|
||||
# else
|
||||
|
@ -67,10 +70,29 @@ struct stack_traces {
|
|||
# endif
|
||||
static void *init_contention_data(void)
|
||||
{
|
||||
contention_fp = fopen("lock-contention-log" FIPS_SFX ".txt", "w");
|
||||
if (contention_fp == NULL)
|
||||
fprintf(stderr, "Contention log file could not be opened, log will not be recorded\n");
|
||||
struct stack_traces *traces;
|
||||
char fname_fmt[] = "lock-contention-log" FIPS_SFX ".%d.txt";
|
||||
char fname[sizeof(fname_fmt) + sizeof(int) * 3];
|
||||
|
||||
traces = OPENSSL_zalloc(sizeof(struct stack_traces));
|
||||
|
||||
snprintf(fname, sizeof(fname), fname_fmt, get_tid());
|
||||
|
||||
traces->fd = open(fname, O_WRONLY | O_APPEND | O_CLOEXEC | O_CREAT, 0600);
|
||||
|
||||
return traces;
|
||||
}
|
||||
|
||||
static void destroy_contention_data(void *data)
|
||||
{
|
||||
struct stack_traces *st = data;
|
||||
|
||||
close(st->fd);
|
||||
OPENSSL_free(data);
|
||||
}
|
||||
|
||||
static void init_contention_data_once(void)
|
||||
{
|
||||
/*
|
||||
* Create a thread local key here to store our list of stack traces
|
||||
* to be printed when we unlock the lock we are holding
|
||||
|
@ -84,56 +106,60 @@ static struct stack_traces *get_stack_traces(bool init)
|
|||
struct stack_traces *traces = CRYPTO_THREAD_get_local(&thread_contention_data);
|
||||
|
||||
if (!traces && init) {
|
||||
traces = OPENSSL_zalloc(sizeof(*traces));
|
||||
traces = init_contention_data();
|
||||
CRYPTO_THREAD_set_local(&thread_contention_data, traces);
|
||||
}
|
||||
|
||||
return traces;
|
||||
}
|
||||
|
||||
static void print_stack_traces(struct stack_traces *traces, FILE *fptr)
|
||||
static void print_stack_traces(struct stack_traces *traces)
|
||||
{
|
||||
unsigned int j;
|
||||
struct iovec *iov;
|
||||
int iovcnt;
|
||||
|
||||
pthread_mutex_lock(&log_lock);
|
||||
while (traces != NULL && traces->idx >= 1) {
|
||||
traces->idx--;
|
||||
fprintf(fptr, "lock blocked on %s for %zu usec at time %zu tid %d\n",
|
||||
dprintf(traces->fd,
|
||||
"lock blocked on %s for %zu usec at time %zu tid %d\n",
|
||||
traces->stacks[traces->idx].write == 1 ? "WRITE" : "READ",
|
||||
ossl_time2us(traces->stacks[traces->idx].duration),
|
||||
ossl_time2us(traces->stacks[traces->idx].start),
|
||||
gettid());
|
||||
get_tid());
|
||||
if (traces->stacks[traces->idx].strings != NULL) {
|
||||
for (j = 0; j < traces->stacks[traces->idx].nptrs; j++)
|
||||
fprintf(fptr, "%s\n", traces->stacks[traces->idx].strings[j]);
|
||||
free(traces->stacks[traces->idx].strings);
|
||||
static const char lf = '\n';
|
||||
|
||||
iovcnt = traces->stacks[traces->idx].nptrs * 2 + 1;
|
||||
iov = alloca(iovcnt * sizeof(*iov));
|
||||
for (j = 0; j < traces->stacks[traces->idx].nptrs; j++) {
|
||||
iov[2 * j].iov_base = traces->stacks[traces->idx].strings[j];
|
||||
iov[2 * j].iov_len = strlen(traces->stacks[traces->idx].strings[j]);
|
||||
iov[2 * j + 1].iov_base = (char *) &lf;
|
||||
iov[2 * j + 1].iov_len = 1;
|
||||
}
|
||||
iov[traces->stacks[traces->idx].nptrs * 2].iov_base = (char *) &lf;
|
||||
iov[traces->stacks[traces->idx].nptrs * 2].iov_len = 1;
|
||||
} else {
|
||||
fprintf(fptr, "No stack trace available\n");
|
||||
static const char no_bt[] = "No stack trace available\n\n";
|
||||
|
||||
iovcnt = 1;
|
||||
iov = alloca(iovcnt * sizeof(*iov));
|
||||
iov[0].iov_base = (char *) no_bt;
|
||||
iov[0].iov_len = sizeof(no_bt) - 1;
|
||||
}
|
||||
fprintf(contention_fp, "\n");
|
||||
writev(traces->fd, iov, iovcnt);
|
||||
free(traces->stacks[traces->idx].strings);
|
||||
}
|
||||
pthread_mutex_unlock(&log_lock);
|
||||
}
|
||||
|
||||
void ossl_init_rwlock_contention_data(void)
|
||||
{
|
||||
CRYPTO_THREAD_run_once(&init_contention_fp, init_contention_fp_once);
|
||||
__atomic_add_fetch(&rwlock_count, 1, __ATOMIC_ACQ_REL);
|
||||
CRYPTO_THREAD_run_once(&init_contention_data_flag, init_contention_data_once);
|
||||
}
|
||||
|
||||
void ossl_free_rwlock_contention_data(void)
|
||||
{
|
||||
/*
|
||||
* Note: It's possible here that OpenSSL may allocate a lock and immediately
|
||||
* free it, in which case we would erroneously close the contention log
|
||||
* prior to the library going on to do more real work. In practice
|
||||
* that never happens though, and since this is a debug facility
|
||||
* we don't worry about that here.
|
||||
*/
|
||||
if (__atomic_add_fetch(&rwlock_count, -1, __ATOMIC_ACQ_REL) == 0) {
|
||||
fclose(contention_fp);
|
||||
contention_fp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int record_lock_contention(pthread_rwlock_t *lock,
|
||||
|
@ -157,7 +183,7 @@ static int record_lock_contention(pthread_rwlock_t *lock,
|
|||
traces->idx++;
|
||||
if (traces->idx >= STACKS_COUNT) {
|
||||
fprintf(stderr, "STACK RECORD OVERFLOW!\n");
|
||||
print_stack_traces(traces, contention_fp);
|
||||
print_stack_traces(traces);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -214,11 +240,11 @@ int ossl_rwlock_unlock(pthread_rwlock_t *lock)
|
|||
{
|
||||
struct stack_traces *traces = get_stack_traces(false);
|
||||
|
||||
if (contention_fp != NULL && traces != NULL) {
|
||||
if (traces != NULL) {
|
||||
traces->lock_depth--;
|
||||
assert(traces->lock_depth >= 0);
|
||||
if (traces->lock_depth == 0)
|
||||
print_stack_traces(traces, contention_fp);
|
||||
print_stack_traces(traces);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,15 +16,7 @@
|
|||
|
||||
# include "internal/threads_common.h"
|
||||
|
||||
# if !defined(__GNUC__) || !defined(__ATOMIC_ACQ_REL) || \
|
||||
defined(BROKEN_CLANG_ATOMICS) || defined(OPENSSL_NO_STDIO)
|
||||
/*
|
||||
* we only enable REPORT_RWLOCK_CONTENTION on clang/gcc when we have
|
||||
* atomics available. We do this because we need to use an atomic to track
|
||||
* when we can close the log file. We could use the CRYPTO_atomic_ api
|
||||
* but that requires lock creation which gets us into a bad recursive loop
|
||||
* when we try to initialize the file pointer
|
||||
*/
|
||||
# if defined(OPENSSL_NO_STDIO)
|
||||
# ifdef REPORT_RWLOCK_CONTENTION
|
||||
# warning "RWLOCK CONTENTION REPORTING NOT SUPPORTED, Disabling"
|
||||
# undef REPORT_RWLOCK_CONTENTION
|
||||
|
|
|
@ -15,29 +15,21 @@
|
|||
#
|
||||
TEMPDIR=$(mktemp -d /tmp/contention.XXXXXX)
|
||||
|
||||
LOGFILE=$1
|
||||
trap "rm -rf $TEMPDIR" EXIT
|
||||
|
||||
if [ ! -f $LOGFILE ]
|
||||
then
|
||||
echo "No log file found"
|
||||
exit 1
|
||||
fi
|
||||
LOGFILEBASE=$(basename $LOGFILE)
|
||||
|
||||
echo "Splitting files"
|
||||
|
||||
#
|
||||
#start by splitting the log into separate stack traces
|
||||
#
|
||||
mkdir $TEMPDIR/individual_files
|
||||
cp $LOGFILE $TEMPDIR/individual_files/$LOGFILEBASE
|
||||
mkdir "$TEMPDIR/individual_files"
|
||||
cat "$@" > "$TEMPDIR/individual_files/log"
|
||||
pushd $TEMPDIR/individual_files/ > /dev/null
|
||||
awk '
|
||||
BEGIN {RS = ""; FS = "\n"}
|
||||
{file_num++; print > ("stacktrace" file_num ".txt")}' ./$LOGFILEBASE
|
||||
{file_num++; print > ("stacktrace" file_num ".txt")}' ./log
|
||||
popd > /dev/null
|
||||
rm -f $TEMPDIR/individual_files/$LOGFILEBASE
|
||||
rm -f $TEMPDIR/individual_files/log
|
||||
|
||||
#
|
||||
# Make some associative arrays to track our stats
|
||||
|
|
Loading…
Reference in New Issue