■debian stretchでkernelデバッグを有効にする。
$ lsb_release -a
No LSB modules are available.
Distributor ID: Debian
Description: Debian GNU/Linux 9.3 (stretch)
Release: 9.3
Codename: stretch
$ uname -r -v
4.9.0-5-amd64
$ apt-cache search kdump
crash - gdb ライクな文法によるカーネルデバッグユーティリティ
libatasmart-bin - ATA S.M.A.R.T. reading and parsing library - utilities
kdump-tools - scripts and tools for automating kdump (Linux crash dumps)
$ apt-cache search kexec
kexec-tools - tools to support fast kexec reboots
mkelfimage - utility to create ELF boot images from Linux kernel images
petitboot - ncurses version of petitboot, a kexec based bootloader
petitboot-twin - Twin GUI version of petitboot, a kexec based bootloader
pxe-kexec - Fetch PXE configuration file and netboot using kexec
■kdump/kexecの導入
$ sudo apt-get install -y kdump-tools crash kexec-tools makedumpfile `uname -r`-dbg
$ sudo sed -i -e 's/\(GRUB_CMDLINE_LINUX_DEFAULT="quiet\)"/\1 crashkernel=128M"/' /etc/default/grub
$ sudo update-grub
Generating grub configuration file ...
Found background image: /usr/share/images/desktop-base/desktop-grub.png
Linux イメージを見つけました: /boot/vmlinuz-4.9.0-5-amd64
Found initrd image: /boot/initrd.img-4.9.0-5-amd64
Linux イメージを見つけました: /boot/vmlinuz-4.9.0-4-amd64
Found initrd image: /boot/initrd.img-4.9.0-4-amd64
完了
$ sudo shutdown -r now && exit
$ awk 'gsub(" ","\n",$0){print $0}' /proc/cmdline | grep crash
crashkernel=128M
crashkernel=384M-:128M
$ sudo sed -i -e 's/\(USE_KDUMP=\)0/\11/' /etc/default/kdump-tools
$ sudo sed -i -e 's/^#\(MAKEDUMP_ARGS\)/\1/' /etc/default/kdump-tools
$ sudo sed -i -e 's/^#\KDUMP_KEXEC_ARGS.*/&\nKDUMP_KEXEC_ARGS="--elf64-core-headers"/' /etc/default/kdump-tools
$ awk '/USE_KDUMP|MAKEDUMP_ARGS|KDUMP_KEXEC_ARGS/&&!/^#/' /etc/default/kdump-tools
USE_KDUMP=1
MAKEDUMP_ARGS="-c -d 31"
KDUMP_KEXEC_ARGS="--elf64-core-headers"
$ sudo systemctl restart kdump-tools
$ sudo kdump-config test | sed -e 's/UUID=[a-f0-9\-]* /UUID={UUID} \n /' -e 's/--/\n &/g'
USE_KDUMP: 1
KDUMP_SYSCTL: kernel.panic_on_oops=1
KDUMP_COREDIR: /var/crash
crashkernel addr: 0x2d000000
kdump kernel addr:
kdump kernel:
/var/lib/kdump/vmlinuz: symbolic link to /boot/vmlinuz-4.9.0-5-amd64
kdump initrd:
/var/lib/kdump/initrd.img: symbolic link to /var/lib/kdump/initrd.img-4.9.0-5-amd64
kexec command to be used:
/sbin/kexec -p
--elf64-core-headers
--command-line="BOOT_IMAGE=/boot/vmlinuz-4.9.0-5-amd64 root=UUID={UUID}
ro quiet irqpoll nr_cpus=1 nousb systemd.unit=kdump-tools.service ata_piix.prefer_ms_hyperv=0"
--initrd=/var/lib/kdump/initrd.img /var/lib/kdump/vmlinuz
■Kernelに割り込み処理を行えるようにMagic System Requestを使えるようにして、
Sysrq triggerでkernel panicを起こしてみる。
$ su root -c 'sync;sync;sync'
$ su root -c 'echo 1 > /proc/sys/kernel/sysrq'
$ su root -c 'echo c > /proc/sysrq-trigger'
$ sudo crash /usr/lib/debug/vmlinux-4.9.0-5-amd64 dump.201801092144
crash 7.1.7
Copyright (C) 2002-2016 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu"...
WARNING: kernel relocated [826MB]: patching 76556 gdb minimal_symbol values
KERNEL: /usr/lib/debug/vmlinux-4.9.0-5-amd64
DUMPFILE: dump.201801092144 [PARTIAL DUMP]
CPUS: 2
DATE: Tue Jan 9 21:44:26 2018
UPTIME: 00:06:45
LOAD AVERAGE: 0.00, 0.02, 0.04
TASKS: 231
NODENAME: vm-gns3
RELEASE: 4.9.0-5-amd64
VERSION:
MACHINE: x86_64 (2925 Mhz)
MEMORY: 7.9 GB
PANIC: "sysrq: SysRq : Trigger a crash"
PID: 1383
COMMAND: "bash"
TASK: ffff8a4beac25180 [THREAD_INFO: ffff8a4beac25180]
CPU: 0
STATE: TASK_RUNNING (SYSRQ)
crash>
■稼働時間(UPTIME)、CPU負荷(LOAD AVERAGE)、タスク数(TASKS)の確認
$ top -b -n 1 | head -5
top - 21:52:39 up 7 min, 2 users, load average: 0.03, 0.14, 0.09
Tasks: 169 total, 1 running, 168 sleeping, 0 stopped, 0 zombie
%Cpu(s): 4.0 us, 1.1 sy, 0.0 ni, 90.7 id, 4.2 wa, 0.0 hi, 0.0 si, 0.0 st
KiB Mem : 7950316 total, 6749468 free, 491412 used, 709436 buff/cache
KiB Swap: 8287228 total, 8287228 free, 0 used. 7216956 avail Mem
■セカンドカーネルがロードされているかどうか、
メモリ、Swapの利用状況はどうなっているか。
crash> cat /sys/kernel/kexec_crash_loaded
1
crash> sys all
...
crash> kmem -i
PAGES TOTAL PERCENTAGE
TOTAL MEM 1987579 7.6 GB ----
FREE 1856768 7.1 GB 93% of TOTAL MEM
USED 130811 511 MB 6% of TOTAL MEM
SHARED 40216 157.1 MB 2% of TOTAL MEM
BUFFERS 6591 25.7 MB 0% of TOTAL MEM
CACHED 0 0 0% of TOTAL MEM
SLAB 9558 37.3 MB 0% of TOTAL MEM
TOTAL SWAP 2071807 7.9 GB ----
SWAP USED 0 0 0% of TOTAL SWAP
SWAP FREE 2071807 7.9 GB 100% of TOTAL SWAP
COMMIT LIMIT 3065596 11.7 GB ----
COMMITTED 311914 1.2 GB 10% of TOTAL LIMIT
crash> swap
SWAP_INFO_STRUCT TYPE SIZE USED PCT PRI FILENAME
ffff8a4beb951800 PARTITION 8287228k 0k 0% -1 /dev/sda5
■PID 1383のbashからの「sysrq: SysRq : Trigger a crash」でkernel panicになっている。
$ sudo grep -A 2 -B 9 1383 /var/crash/201801092144/dmesg.201801092144 | grep -v "Modules linked in"
[ 720.204677] sysrq: SysRq : Trigger a crash
[ 720.204786] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 720.204789] IP: [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
[ 720.204832] PGD 800000022a5ec067
[ 720.204833] PUD 22c0da067
[ 720.204835] PMD 0
[ 720.204835]
[ 720.204837] Oops: 0002 [
[ 720.204876] CPU: 0 PID: 1383 Comm: bash Not tainted 4.9.0-5-amd64
[ 720.204877] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016
[ 720.204879] task: ffff8a4beac25180 task.stack: ffff9ce9c2298000
■PID 1383はbash。
「write_sysrq_trigger at ffffffffb4e203eb」あたりかな。
crash> bt -r 1383 | head -5
PID: 1383 TASK: ffff8a4beac25180 CPU: 0 COMMAND: "bash"
ffff9ce9c2298000: 0000000057ac6e9d 0000000000000000
ffff9ce9c2298010: 0000000000000000 0000000000000000
ffff9ce9c2298020: 0000000000000000 0000000000000000
ffff9ce9c2298030: 0000000000000000 0000000000000000
crash> bt -f 1383 | head -5
PID: 1383 TASK: ffff8a4beac25180 CPU: 0 COMMAND: "bash"
ffff9ce9c229bbc8: 0000a9dc17f40fcd ffff8a49c0000000
ffff9ce9c229bbd8: 000000002d001000 ffff8a49ed001000
ffff9ce9c229bbe8: 000000002d000000 8d28220100010800
crash> bt 1383
PID: 1383 TASK: ffff8a4beac25180 CPU: 0 COMMAND: "bash"
[exception RIP: sysrq_handle_crash+18]
RIP: ffffffffb4e1f892 RSP: ffff9ce9c229be90 RFLAGS: 00010282
RAX: 000000000000000f RBX: 0000000000000063 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff8a4bf9c10608 RDI: 0000000000000063
RBP: ffffffffb56bf040 R8: 0000000000000001 R9: 0000000000000af4
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000004
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000002
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
RIP: 00007f9d0934b760 RSP: 00007fff06ff68c8 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f9d0934b760
RDX: 0000000000000002 RSI: 0000000001cf4008 RDI: 0000000000000001
RBP: 0000000000000040 R8: 00007f9d0960b760 R9: 00007f9d09c41b40
R10: 0000000000000097 R11: 0000000000000246 R12: 0000000001d05000
R13: 0000000000000004 R14: 00000000004cbea4 R15: 0000000000000619
ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b
crash> ps | tail -3
1040 1039 1 ffff8a4bec00ae00 IN 0.1 20704 5316 bash
1382 1040 0 ffff8a4beabc3000 IN 0.0 56612 3732 su
> 1383 1382 0 ffff8a4beac25180 RU 0.0 10664 3048 bash
crash> bt 1382 | head -1
PID: 1382 TASK: ffff8a4beabc3000 CPU: 0 COMMAND: "su"
■「sysrq-trigger」が原因であることを示すその他の情報
crash> dis sysrq | grep crash
ffffffffb4e1f880 (t) sysrq_handle_crash
ffffffffb56bf040 (d) sysrq_crash_op
crash> dis sysrq_handle_crash
0xffffffffb4e1f880 <sysrq_handle_crash>: data32 data32 data32 xchg %ax,%ax [FTRACE NOP]
0xffffffffb4e1f885 <sysrq_handle_crash+5>: movl $0x1,0xa8d3b9(%rip)
0xffffffffb4e1f88f <sysrq_handle_crash+15>: sfence
0xffffffffb4e1f892 <sysrq_handle_crash+18>: movb $0x1,0x0
0xffffffffb4e1f89a <sysrq_handle_crash+26>: retq
0xffffffffb4e1f89b <sysrq_handle_crash+27>: nopl 0x0(%rax,%rax,1)
crash> log | grep sysrq_handle_crash
[ 720.204789] IP: [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
[ 720.204880] RIP: 0010:[<ffffffffb4e1f892>] [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
[ 720.205017] RIP [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
crash> dis sysrq_handle_crash+0x12
0xffffffffb4e1f892 <sysrq_handle_crash+18>: movb $0x1,0x0
crash> dis sysrq_handle_crash+18
0xffffffffb4e1f892 <sysrq_handle_crash+18>: movb $0x1,0x0
crash> files
PID: 1383 TASK: ffff8a4beac25180 CPU: 0 COMMAND: "bash"
ROOT: / CWD: /home/labunix
FD FILE DENTRY INODE TYPE PATH
0 ffff8a4beb03c300 ffff8a4bec4e50c0 ffff8a4bee867000 CHR /dev/pts/0
1 ffff8a4bed760200 ffff8a4bec605000 ffff8a4bedbee2b8 REG /proc/sysrq-trigger
2 ffff8a4beb03c300 ffff8a4bec4e50c0 ffff8a4bee867000 CHR /dev/pts/0
10 ffff8a4beb03c300 ffff8a4bec4e50c0 ffff8a4bee867000 CHR /dev/pts/0
crash> ps -t 1383
PID: 1383 TASK: ffff8a4beac25180 CPU: 0 COMMAND: "bash"
RUN TIME: 00:00:00
START TIME: 720188093927
UTIME: 0
STIME: 1
crash> ps -S
RU: 3
IN: 228
crash> ps -a bash
PID: 1040 TASK: ffff8a4bec00ae00 CPU: 1 COMMAND: "bash"
ps: cannot access user stack address: 7ffd119f0e7c
PID: 1383 TASK: ffff8a4beac25180 CPU: 0 COMMAND: "bash"
ps: cannot access user stack address: 7fff06ff7791
crash> set -p
PID: 1383
COMMAND: "bash"
TASK: ffff8a4beac25180 [THREAD_INFO: ffff8a4beac25180]
CPU: 0
STATE: TASK_RUNNING (SYSRQ)