// SPDX-License-Identifier: LGPL-2.1 /* * rseq.c * * Copyright (C) 2016 Mathieu Desnoyers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; only * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../kselftest.h" #include "rseq.h" /* * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. */ __weak ptrdiff_t __rseq_offset; __weak unsigned int __rseq_size; __weak unsigned int __rseq_flags; static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; static const unsigned int *libc_rseq_size_p = &__rseq_size; static const unsigned int *libc_rseq_flags_p = &__rseq_flags; /* Offset from the thread pointer to the rseq area. */ ptrdiff_t rseq_offset; /* * Size of the registered rseq area. 0 if the registration was * unsuccessful. */ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ /* Allocate a large area for the TLS. */ #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 /* Original struct rseq feature size is 20 bytes. */ #define ORIG_RSEQ_FEATURE_SIZE 20 /* Original struct rseq allocation size is 32 bytes. */ #define ORIG_RSEQ_ALLOC_SIZE 32 static __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, }; static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) { return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } static int sys_getcpu(unsigned *cpu, unsigned *node) { return syscall(__NR_getcpu, cpu, node, NULL); } int rseq_available(void) { int rc; rc = sys_rseq(NULL, 0, 0, 0); if (rc != -1) abort(); switch (errno) { case ENOSYS: return 0; case EINVAL: return 1; default: abort(); } } /* The rseq areas need to be at least 32 bytes. */ static unsigned int get_rseq_min_alloc_size(void) { unsigned int alloc_size = rseq_size; if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) alloc_size = ORIG_RSEQ_ALLOC_SIZE; return alloc_size; } /* * Return the feature size supported by the kernel. * * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): * * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). * * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. */ static unsigned int get_rseq_kernel_feature_size(void) { unsigned long auxv_rseq_feature_size, auxv_rseq_align; auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); if (auxv_rseq_feature_size) return auxv_rseq_feature_size; else return ORIG_RSEQ_FEATURE_SIZE; } int rseq_register_current_thread(void) { int rc; if (!rseq_ownership) { /* Treat libc's ownership as a successful registration. */ return 0; } rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ abort(); } return -1; } assert(rseq_current_cpu_raw() >= 0); RSEQ_WRITE_ONCE(rseq_reg_success, 1); return 0; } int rseq_unregister_current_thread(void) { int rc; if (!rseq_ownership) { /* Treat libc's ownership as a successful unregistration. */ return 0; } rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } static __attribute__((constructor)) void rseq_init(void) { /* * If the libc's registered rseq size isn't already valid, it may be * because the binary is dynamically linked and not necessarily due to * libc not having registered a restartable sequence. Try to find the * symbols if that's the case. */ if (!*libc_rseq_size_p) { libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { unsigned int libc_rseq_size; /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; /* * Previous versions of glibc expose the value * 32 even though the kernel only supported 20 * bytes initially. Therefore treat 32 as a * special-case. glibc 2.40 exposes a 20 bytes * __rseq_size without using getauxval(3) to * query the supported size, while still allocating a 32 * bytes area. Also treat 20 as a special-case. * * Special-cases are handled by using the following * value as active feature set size: * * rseq_size = min(32, get_rseq_kernel_feature_size()) */ switch (libc_rseq_size) { case ORIG_RSEQ_FEATURE_SIZE: fallthrough; case ORIG_RSEQ_ALLOC_SIZE: { unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) rseq_size = rseq_kernel_feature_size; else rseq_size = ORIG_RSEQ_ALLOC_SIZE; break; } default: /* Otherwise just use the __rseq_size from libc as rseq_size. */ rseq_size = libc_rseq_size; break; } return; } rseq_ownership = 1; if (!rseq_available()) { rseq_size = 0; return; } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); rseq_flags = 0; } static __attribute__((destructor)) void rseq_exit(void) { if (!rseq_ownership) return; rseq_offset = 0; rseq_size = -1U; rseq_ownership = 0; } int32_t rseq_fallback_current_cpu(void) { int32_t cpu; cpu = sched_getcpu(); if (cpu < 0) { perror("sched_getcpu()"); abort(); } return cpu; } int32_t rseq_fallback_current_node(void) { uint32_t cpu_id, node_id; int ret; ret = sys_getcpu(&cpu_id, &node_id); if (ret) { perror("sys_getcpu()"); return ret; } return (int32_t) node_id; }