Hi,大佬们,我自己编译了一个systemd 241的版本,将其打包成镜像,烧录到了arm板上,然后会概率性出现一个很奇怪的问题,只要执行systemctl就会出现core dump,后来发现运行systemd相关的所有进程都会出现这个问题,但是已经运行起来的进程却在正常的运行。
strace 追踪systemctl发现,内存映射出了问题
execve("/bin/systemctl", ["systemctl"], 0x7eb33e30 /* 9 vars */) = 0
brk(NULL) = 0x1432000
uname({sysname="Linux", nodename="HDM", ...}) = 0
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/systemd/tls/v7l/libsystemd-shared-241.so", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat64("/lib/systemd/tls/v7l", 0x7ea0e280) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/systemd/tls/libsystemd-shared-241.so", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat64("/lib/systemd/tls", 0x7ea0e280) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/systemd/v7l/libsystemd-shared-241.so", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat64("/lib/systemd/v7l", 0x7ea0e280) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/systemd/libsystemd-shared-241.so", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0000t\3\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=2273920, ...}) = 0
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x76fc1000
mmap2(NULL, 2340504, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x76d57000
mprotect(0x76f3a000, 65536, PROT_NONE) = 0
mmap2(0x76f4a000, 294912, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e3000) = 0x76f4a000
mmap2(0x76f92000, 1688, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x76f92000
close(3) = 0
openat(AT_FDCWD, "/lib/systemd/libpthread.so.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=39163, ...}) = 0
mmap2(NULL, 39163, PROT_READ, MAP_PRIVATE, 3, 0) = 0x76fb7000
close(3) = 0
openat(AT_FDCWD, "/usr/lib/libpthread.so.0", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0004O\0\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=100604, ...}) = 0
mmap2(NULL, 172648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x76d2c000
mprotect(0x76d44000, 61440, PROT_NONE) = 0
mmap2(0x76d53000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x17000) = 0x76d53000
mmap2(0x76d55000, 4712, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x76d55000
close(3) = 0
openat(AT_FDCWD, "/lib/systemd/libc.so.6", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/libc.so.6", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0\300\203\1\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1341252, ...}) = 0
mmap2(NULL, 1410240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x76bd3000
mprotect(0x76d16000, 65536, PROT_NONE) = 0
mmap2(0x76d26000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x143000) = 0x76d26000
mmap2(0x76d29000, 9408, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x76d29000
close(3) = 0
openat(AT_FDCWD, "/lib/systemd/libgcc_s.so.1", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/libgcc_s.so.1", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0\250\323\0\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=124612, ...}) = 0
mmap2(NULL, 188824, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x76ba4000
mprotect(0x76bc2000, 61440, PROT_NONE) = 0
mmap2(0x76bd1000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1d000) = 0x76bd1000
close(3) = 0
--- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x4} ---
+++ killed by SIGSEGV (core dumped) +++
Segmentation fault
进行gdb解析如下,根据pc进行反汇编,参考glibc的代码,了解到r6存储的是link_map的结构,用于描述lib的结构
(gdb) inf r
r0 0x1 1
r1 0x76f2d289 1995625097
r2 0x0 0
r3 0x0 0
r4 0x14c4c 85068
r5 0x7ed14f0b 2127646475
r6 0x76f2d000 1995624448
r7 0x159f 5535
r8 0x76f30970 1995639152
r9 0xd696914 225011988
r10 0x76f2f938 1995635000
r11 0x6 6
r12 0x76f30060 1995636832
sp 0x7ed14b28 0x7ed14b28
lr 0x76f0fb88 1995504520
pc 0x76f0ff64 0x76f0ff64
cpsr 0x60000010 1610612752
(gdb) x/30i 0x76f0ff50
0x76f0ff50: ldr r7, [r4, #8]
0x76f0ff54: ldr r6, [r3, #20]
0x76f0ff58: ldr r2, [r10]
0x76f0ff5c: ldr r3, [r6, #52] ; 0x34
0x76f0ff60: tst r2, #16
=> 0x76f0ff64: ldr r9, [r3, #4]
0x76f0ff68: ldr r3, [sp, #12]
0x76f0ff6c: ldr r8, [r4]
0x76f0ff70: add r7, r3, r7
0x76f0ff74: ldrh r3, [r4, #4]
0x76f0ff78: str r3, [sp, #20]
0x76f0ff7c: bne 0x76f10150
0x76f0ff80: ldr r3, [r6, #184] ; 0xb8
0x76f0ff84: cmp r3, #0
0x76f0ff88: beq 0x76f10100
0x76f0ff8c: ldr r3, [r3, #4]
0x76f0ff90: cmp r3, #0
0x76f0ff94: beq 0x76f10558
0x76f0ff98: ldr r2, [r6]
0x76f0ff9c: ldrh r1, [r3, r2]
0x76f0ffa0: add r3, r3, r2
0x76f0ffa4: cmp r1, #1
0x76f0ffa8: bne 0x76f10004
在访问link_map下的stetab 节时出现了异常访问
查看该link_map,
(gdb) p *(link_map *)0x76f2d000
$1 = {l_addr = 1993093120,
l_name = 0x76f30fa8 "/lib/systemd/libsystemd-shared-241.so",
l_ld = 0x76efad48, l_next = 0x76f2d2d8, l_prev = 0x76f30c68,
l_real = 0x76f2d000, l_ns = 0, l_libname = 0x76f2d264, l_info = {
0x0 <repeats 77 times>}, l_phdr = 0x76cc3034, l_entry = 1993319472,
l_phnum = 7, l_ldnum = 52, l_searchlist = {r_list = 0x0, r_nlist = 0},
l_symbolic_searchlist = {r_list = 0x76f2d260, r_nlist = 0},
l_loader = 0x76f30970, l_versions = 0x0, l_nversions = 0, l_nbuckets = 0,
l_gnu_bitmask_idxbits = 0, l_gnu_shift = 0, l_gnu_bitmask = 0x0, {
l_gnu_buckets = 0x0, l_chain = 0x0}, {l_gnu_chain_zero = 0x0,
l_buckets = 0x0}, l_direct_opencount = 0, l_type = lt_library,
l_relocated = 0, l_init_called = 0, l_global = 1, l_reserved = 0,
l_phdr_allocated = 0, l_soname_added = 0, l_faked = 0, l_need_tls_init = 0,
l_auditing = 0, l_audit_any_plt = 0, l_removed = 0, l_contiguous = 1,
l_symbolic_in_local_scope = 0, l_free_initfini = 1, l_rpath_dirs = {
dirs = 0x0, malloced = 0}, l_reloc_result = 0x0, l_versyms = 0x0,
l_origin = 0x76f2d290 "/lib/systemd", l_map_start = 1993093120,
l_map_end = 1995433624, l_text_end = 1995071488, l_scope_mem = {0x76f30ad0,
0x0, 0x0, 0x0}, l_scope_max = 4, l_scope = 0x76f2d1bc, l_local_scope = {
0x76f2d160, 0x0}, l_file_id = {dev = 32, ino = 1403}, l_runpath_dirs = {
dirs = 0x0, malloced = 0}, l_initfini = 0x76f2dac8, l_reldeps = 0x0,
l_reldepsmax = 0, l_used = 1, l_feature_1 = 0, l_flags_1 = 0, l_flags = 0,
l_idx = 0, l_mach = {plt = 0, tlsdesc_table = 0x0}, l_lookup_cache = {
--Type <RET> for more, q to quit, c to continue without paging--
sym = 0x0, type_class = 0, value = 0x0, ret = 0x0},
l_tls_initimage = 0x76eb6788, l_tls_initimage_size = 28,
l_tls_blocksize = 148, l_tls_align = 8, l_tls_firstbyte_offset = 0,
l_tls_offset = 0, l_tls_modid = 1, l_tls_dtor_count = 0,
l_relro_addr = 2045832, l_relro_size = 288888, l_serial = 3,
l_audit = 0x76f2d260}
发现l_info 全是0了,这是不正常的啊,此外lib路径使用了overlay,当时有考虑过是不是overlay的影响,带式该路径的upper上什么文件都没有,因此没啥思路了。
有没有大佬遇到过类似的问题啊,或者有没有什么想法,非常感谢!!!