Summary: | gpg2 is stuck (spinning) under fakeroot-1.29-alt1 on e2k | ||
---|---|---|---|
Product: | Sisyphus | Reporter: | Ivan Zakharyaschev <imz> |
Component: | fakeroot | Assignee: | placeholder <placeholder> |
Status: | CLOSED FIXED | QA Contact: | qa-sisyphus |
Severity: | normal | ||
Priority: | P5 | CC: | glebfm, ilyakurdyukov, ldv, mike, placeholder |
Version: | unstable | ||
Hardware: | e2k | ||
OS: | Linux |
Description
Ivan Zakharyaschev
2023-04-05 16:31:40 MSK
strace reports the following at this moment. (Here I used --root instead of fakeroot. /cmd contained the same command.) $ hsh-run --root ~/hasher/ -- strace -f -y /bin/sh /cmd &>strace-apt-gpg.0 ... [pid 2005524] read(9, <unfinished ...> [pid 2005520] write(4, "D (genkey(rsa(nbits 4:3072)))\n", 30 <unfinished ...> [pid 2005524] <... read resumed> "D (genkey(rsa(nbits 4:3072)))\n", 1002) = 30 [pid 2005520] <... write resumed> ) = 30 [pid 2005524] read(9, <unfinished ...> [pid 2005520] write(4, "END", 3 <unfinished ...> [pid 2005524] <... read resumed> "END", 1002) = 3 [pid 2005520] <... write resumed> ) = 3 [pid 2005524] read(9, <unfinished ...> [pid 2005520] write(4, "\n", 1 <unfinished ...> [pid 2005524] <... read resumed> "\n", 999) = 1 [pid 2005520] <... write resumed> ) = 1 [pid 2005520] read(4, <unfinished ...> [pid 2005524] access("/dev/random", R_OK) = 0 [pid 2005524] access("/dev/urandom", R_OK) = 0 [pid 2005524] getpid() = 2005523 [pid 2005524] getpid() = 2005523 [pid 2005524] open("/etc/gcrypt/random.conf", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] getpid() = 2005523 [pid 2005524] open("/etc/gcrypt/random.conf", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index0/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index1/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index2/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index3/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index0/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index1/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index2/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index3/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index0/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index1/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index2/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] openat(AT_FDCWD, "/sys/devices/system/cpu/cpu2/cache/index3/level", O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2005524] mmap2(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x4588f88ea000 [pid 2005524] munmap(0x4588f88ea000, 135168) = 0 [pid 2005524] mprotect(0x4588fc021000, 8192, PROT_READ|PROT_WRITE) = 0 [pid 2005524] open("/dev/random", O_RDONLY) = 10 [pid 2005524] fcntl64(10, F_GETFD) = 0 [pid 2005524] fcntl64(10, F_SETFD, FD_CLOEXEC) = 0 [pid 2005524] restart_syscall(<... resuming interrupted fcntl64 ...>) = -1 EINTR (Interrupted system call) [pid 2005524] restart_syscall(<... resuming interrupted restart_syscall ...>) = -1 EINTR (Interrupted system call) [pid 2005524] restart_syscall(<... resuming interrupted restart_syscall ...>) = -1 EINTR (Interrupted system call) ... strace of the same moment without fakeroot. (Here, for some reason, strace didn't decode string args!..) $ hsh-run ~/hasher/ -- strace -f -y /bin/sh /cmd2 &>strace-apt-gpg.2 ... [pid 2006040] read(9, <unfinished ...> [pid 2006036] write(4, 0x554e18, 30 <unfinished ...> [pid 2006040] <... read resumed> 0x458f0c000ce0, 1002) = 30 [pid 2006036] <... write resumed> ) = 30 [pid 2006040] read(9, <unfinished ...> [pid 2006036] write(4, 0x4578b408e184, 3 <unfinished ...> [pid 2006040] <... read resumed> 0x458f0c000ce0, 1002) = 3 [pid 2006036] <... write resumed> ) = 3 [pid 2006040] read(9, <unfinished ...> [pid 2006036] write(4, 0x4578b408e144, 1 <unfinished ...> [pid 2006040] <... read resumed> 0x458f0c000ce3, 999) = 1 [pid 2006036] <... write resumed> ) = 1 [pid 2006036] read(4, <unfinished ...> [pid 2006040] access(0x458f09cd0190, R_OK) = 0 [pid 2006040] access(0x458f09cd01a0, R_OK) = 0 [pid 2006040] getpid() = 2006039 [pid 2006040] getpid() = 2006039 [pid 2006040] open(0x458f09ccfd50, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] getpid() = 2006039 [pid 2006040] open(0x458f09ccfd50, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] openat(AT_FDCWD, 0x458f0b2d0f70, O_RDONLY) = -1 ENOENT (No such file or directory) [pid 2006040] mmap2(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x458f0b2d3000 [pid 2006040] munmap(0x458f0b2d3000, 135168) = 0 [pid 2006040] mprotect(0x458f0c021000, 8192, PROT_READ|PROT_WRITE) = 0 [pid 2006040] open(0x458f09cd12b0, O_RDONLY) = 10 [pid 2006040] fcntl64(10, F_GETFD) = 0 [pid 2006040] fcntl64(10, F_SETFD, FD_CLOEXEC) = 0 [pid 2006040] getrandom(0x458f0b2d1140, 96, 0) = 96 ... May be related to https://bugzilla.altlinux.org/45737 ?.. (Ответ для Ivan Zakharyaschev на комментарий #2) > May be related to https://bugzilla.altlinux.org/45737 ?.. JFYI, we've observed another regression with fakeroot 1.29 on e2k ("warning: Unable to reset I/O priority" spam from rpm's lib/rpmscript.c that didn't occur with fakeroot 1.28 under the same kernel); glebfm@ noted that might be related to syscall function wrapper that got introduced in this version: http://git.altlinux.org/gears/f/fakeroot.git?p=fakeroot.git;a=commitdiff;h=f091ef785ee9f3484b91c8e918d8241e8d244d83 (Ответ для Ivan Zakharyaschev на комментарий #0) > And it gets stuck, consuming all CPU. All 100% of a single CPU core, strictly speaking. :-) I've had a thought (without any confirmation) that this new syscall wrapper can lead to access to prohibited memory among these universal 6 arguments, like to a result of speculative computation. Previously, I saw something like SIGBUS in such cases, but here I don't see any signals in strace at this moment, but just resuming some bizarre syscall after endless interrupts; the reason of the interrupts is not shown. This might not be the real reason, just an idea. (Ответ для Ivan Zakharyaschev на комментарий #4) > I've had a thought (without any confirmation) that this new syscall wrapper > can lead to access to prohibited memory among these universal 6 arguments, > like to a result of speculative computation. Previously, I saw something > like SIGBUS in such cases, but here I don't see any signals in strace at > this moment, but just resuming some bizarre syscall after endless > interrupts; the reason of the interrupts is not shown. This might not be the > real reason, just an idea. "prohibited use of value" would be a more correct description for that hypothetical situation (In reply to Ivan Zakharyaschev from comment #1) > strace of the same moment without fakeroot. (Here, for some reason, strace > didn't decode string args!..) That's because strace was denied permission to access the tracee's memory. Meanwhile I've understood the problem: a special calling convention on e2k. And I'm doing a fix. http://ftp.altlinux.org/pub/people/mike/elbrus/docs/elbrus_prog/html/chapter9.html#id34 : 9.5.1. Передача параметров¶ ... Зависимость передачи параметров от интерфейса процедуры При генерации кода компилятор вправе использовать и доверять информации об интерфейсе процедуры в точке ее вызова. Эта информация получается из заданного предописания процедуры. Возможен и анализ по вызову при отсутствии предописания. Все предописания можно разделить на три группы: предописание со спецификацией всех параметров; предописание со спецификацией переменного числа параметров; предописание без спецификации параметров. Передача параметров для вызова со спецификацией всех параметров осуществляется по общей схеме, приведенной выше. Интерфейс обработки списка переменного числа параметров подразумевает нахождение их в памяти. Поэтому при передаче параметров для вызова процедуры с переменным числом параметров, параметры, входящие в список переменного числа (начиная с параметра перед эллипсом), сразу размещаются в соответствующие места локального стека, даже если они могут быть помещены в первые восемь регистров. Если для вызова процедуры нет предописания со спецификацией параметров, необходимо предусмотреть все возможные случаи. Поэтому при формировании списка фактических параметров первые восемь параметров помещаются и на регистры (как в случае процедур с фиксированным числом параметров), и в память (как в случае процедур с переменным числом параметров). Таким образом, процедура с переменным числом параметров всегда может предполагать, что переменная часть параметров находится в памяти. А для процедуры с фиксированным числом параметров первые параметры находятся в первых восьми регистрах. * * * The syscall.S assembler implementation on e2k indeed simply loads all arguments from the stack. I'm not sure whether this difference in the prototype can be important for some other platform, but who knows... glibc's syscall.S implementations don't look like this would be the case. However, searching the web, one can learn that AArch64/MacOS has the same "problem". (Note that on aarch64 xN registers are 64-bit registers, and wN are their 32-bit parts.) https://cpufun.substack.com/i/32634393/why-bother-us-if-this-all-works : Why bother us if this all works? The reason this is an issue at all is that it doesn’t work this way on AArch64/MacOS You may have checked the AArch64 compilers in Compiler Explorer and seen code like this, add x0, sp, #4 // =4 mov w1, #1 mov w2, #2 mov w3, #3 bl foo(int*, int, int, int) ... add x0, sp, #4 // =4 mov w1, #4 mov w2, #5 mov w3, #6 bl foo_ellipsis(int*, ...) which shows the same properties as that on x86_64: the arguments are being passed in the same places whether or not this is a variadic function, so that’s all good, right? But… and it’s a big BUT, the calling convention on AArch64/MacOS is not like this. Here the compiler doesn’t load arguments which are matching the ellipsis into registers, but rather puts them onto the stack. Then the va_list code extracts them from there. As a result the test code fails when run natively on the MacOS M1 machines. * * * I'm making my fix pretty and making a task with it. I suggest the fix in task 318100 (Ответ для Ivan Zakharyaschev на комментарий #4) > I've had a thought (without any confirmation) that this new syscall wrapper > can lead to access to prohibited memory among these universal 6 arguments, > like to a result of speculative computation. Previously, I saw something > like SIGBUS in such cases, but here I don't see any signals in strace at > this moment, but just resuming some bizarre syscall after endless > interrupts; the reason of the interrupts is not shown. This might not be the > real reason, just an idea. No, the way the syscall wrapper was compiled (and optimized) on e2k (see with disassemble in gdb) was almost the same as syscall.S implementation; so, since they write it that way, nothing bad should have been expected (except for the wrong calling convention): # tail -n28 glibc-2.29-alt2.E2K.26.012.1/sysdeps/unix/sysv/linux/e2k/syscall.S #include <sysdep.h> .ignore ld_st_style .text ENTRY (syscall) setwd wsz = 0x9 setbn rsz = 0x3, rbs = 0x5, rcur = 0x0 getsp 0x0, %r7 __SYSCALL_ARG_MEM (%r7, 0x0, %b[0]) __SYSCALL_ARG_MEM (%r7, 0x8, %b[1]) __SYSCALL_ARG_MEM (%r7, 0x10, %b[2]) __SYSCALL_ARG_MEM (%r7, 0x18, %b[3]) __SYSCALL_ARG_MEM (%r7, 0x20, %b[4]) __SYSCALL_ARG_MEM (%r7, 0x28, %b[5]) __SYSCALL_ARG_MEM (%r7, 0x30, %b[6]) sdisp %ctpr1, __SYSCALL_TRAPNUM call %ctpr1, wbs = 0x5 __SYSCALL_OUTPUT ret PSEUDO_END (syscall) # __SYSCALL_ARG_MEM is just a load. glibc-2.29-alt2.E2K.26.012.1/sysdeps/unix/sysv/linux/e2k/e2k64/sysdep.h: #define __SYSCALL_ARG_MEM(src1, src2, dst) ldd src1, src2, dst (Ответ для Ivan Zakharyaschev на комментарий #9) > I suggest the fix in task 318100 Fixed there. From: bugzilla-admin@altlinux.org Subject: [Bug 45757] Unable to close via changelog: no such bug You have tried to close bug 45757 via changelog (see below for the changelog excerpt). Unfortunately, it is not possible: this bug does not exist https://bugzilla.altlinux.org/45757 Sincerely, your Bugzilla. :-/ ?? Amended a bit more in: task #318154: added #100: build tag "1.29-alt3" from /people/imz/packages/fakeroot.git * Thu Apr 6 2023 Ivan Zakharyaschev <imz@altlinux.org> 1.29-alt3 - Fixed a compiler error of older GCCs (for p10) in the wrapper for syscall function. - Warn the maintainer if a function definition is missing (when it is not generated for special cases like calling a variadic function like syscall). |