labunix's blog

labunixのラボUnix

debian stretchでkernelデバッグを有効にする。

■debian stretchでkernelデバッグを有効にする。

$ lsb_release -a
No LSB modules are available.
Distributor ID:	Debian
Description:	Debian GNU/Linux 9.3 (stretch)
Release:	9.3
Codename:	stretch

$ uname -r -v
4.9.0-5-amd64 #1 SMP Debian 4.9.65-3+deb9u2 (2018-01-04)

$ apt-cache search kdump
crash - gdb ライクな文法によるカーネルデバッグユーティリティ
libatasmart-bin - ATA S.M.A.R.T. reading and parsing library - utilities
kdump-tools - scripts and tools for automating kdump (Linux crash dumps)

$ apt-cache search kexec
kexec-tools - tools to support fast kexec reboots
mkelfimage - utility to create ELF boot images from Linux kernel images
petitboot - ncurses version of petitboot, a kexec based bootloader
petitboot-twin - Twin GUI version of petitboot, a kexec based bootloader
pxe-kexec - Fetch PXE configuration file and netboot using kexec

■kdump/kexecの導入

$ sudo apt-get install -y kdump-tools crash kexec-tools makedumpfile `uname -r`-dbg
$ sudo sed -i -e 's/\(GRUB_CMDLINE_LINUX_DEFAULT="quiet\)"/\1 crashkernel=128M"/' /etc/default/grub
$ sudo update-grub
Generating grub configuration file ...
Found background image: /usr/share/images/desktop-base/desktop-grub.png
Linux イメージを見つけました: /boot/vmlinuz-4.9.0-5-amd64
Found initrd image: /boot/initrd.img-4.9.0-5-amd64
Linux イメージを見つけました: /boot/vmlinuz-4.9.0-4-amd64
Found initrd image: /boot/initrd.img-4.9.0-4-amd64
完了
$ sudo shutdown -r now && exit

$ awk 'gsub(" ","\n",$0){print $0}' /proc/cmdline | grep crash
crashkernel=128M
crashkernel=384M-:128M

$ sudo sed -i -e 's/\(USE_KDUMP=\)0/\11/' /etc/default/kdump-tools 
$ sudo sed -i -e 's/^#\(MAKEDUMP_ARGS\)/\1/' /etc/default/kdump-tools 
$ sudo sed -i -e 's/^#\KDUMP_KEXEC_ARGS.*/&\nKDUMP_KEXEC_ARGS="--elf64-core-headers"/' /etc/default/kdump-tools 
$ awk '/USE_KDUMP|MAKEDUMP_ARGS|KDUMP_KEXEC_ARGS/&&!/^#/' /etc/default/kdump-tools 
USE_KDUMP=1
MAKEDUMP_ARGS="-c -d 31"
KDUMP_KEXEC_ARGS="--elf64-core-headers"

$ sudo systemctl restart kdump-tools 

$ sudo kdump-config test | sed -e 's/UUID=[a-f0-9\-]* /UUID={UUID} \n    /' -e 's/--/\n  &/g'
USE_KDUMP:         1
KDUMP_SYSCTL:      kernel.panic_on_oops=1
KDUMP_COREDIR:     /var/crash
crashkernel addr:  0x2d000000
kdump kernel addr: 
kdump kernel:
   /var/lib/kdump/vmlinuz: symbolic link to /boot/vmlinuz-4.9.0-5-amd64
kdump initrd: 
   /var/lib/kdump/initrd.img: symbolic link to /var/lib/kdump/initrd.img-4.9.0-5-amd64
kexec command to be used:
  /sbin/kexec -p 
  --elf64-core-headers 
  --command-line="BOOT_IMAGE=/boot/vmlinuz-4.9.0-5-amd64 root=UUID={UUID} 
    ro quiet irqpoll nr_cpus=1 nousb systemd.unit=kdump-tools.service ata_piix.prefer_ms_hyperv=0" 
  --initrd=/var/lib/kdump/initrd.img /var/lib/kdump/vmlinuz

■Kernelに割り込み処理を行えるようにMagic System Requestを使えるようにして、
 Sysrq triggerでkernel panicを起こしてみる。

$ su root -c 'sync;sync;sync'
$ su root -c 'echo 1 > /proc/sys/kernel/sysrq'
$ su root -c 'echo c > /proc/sysrq-trigger'

$ sudo crash /usr/lib/debug/vmlinux-4.9.0-5-amd64 dump.201801092144 

crash 7.1.7
Copyright (C) 2002-2016  Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010  IBM Corporation
Copyright (C) 1999-2006  Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012  Fujitsu Limited
Copyright (C) 2006, 2007  VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011  NEC Corporation
Copyright (C) 1999, 2002, 2007  Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002  Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions.  Enter "help copying" to see the conditions.
This program has absolutely no warranty.  Enter "help warranty" for details.
 
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu"...

WARNING: kernel relocated [826MB]: patching 76556 gdb minimal_symbol values

      KERNEL: /usr/lib/debug/vmlinux-4.9.0-5-amd64                     
    DUMPFILE: dump.201801092144  [PARTIAL DUMP]
        CPUS: 2
        DATE: Tue Jan  9 21:44:26 2018
      UPTIME: 00:06:45
LOAD AVERAGE: 0.00, 0.02, 0.04
       TASKS: 231
    NODENAME: vm-gns3
     RELEASE: 4.9.0-5-amd64
     VERSION: #1 SMP Debian 4.9.65-3+deb9u2 (2018-01-04)
     MACHINE: x86_64  (2925 Mhz)
      MEMORY: 7.9 GB
       PANIC: "sysrq: SysRq : Trigger a crash"
         PID: 1383
     COMMAND: "bash"
        TASK: ffff8a4beac25180  [THREAD_INFO: ffff8a4beac25180]
         CPU: 0
       STATE: TASK_RUNNING (SYSRQ)

crash> 

■稼働時間(UPTIME)、CPU負荷(LOAD AVERAGE)、タスク数(TASKS)の確認

$ top -b -n 1 | head -5
top - 21:52:39 up 7 min,  2 users,  load average: 0.03, 0.14, 0.09
Tasks: 169 total,   1 running, 168 sleeping,   0 stopped,   0 zombie
%Cpu(s):  4.0 us,  1.1 sy,  0.0 ni, 90.7 id,  4.2 wa,  0.0 hi,  0.0 si,  0.0 st
KiB Mem :  7950316 total,  6749468 free,   491412 used,   709436 buff/cache
KiB Swap:  8287228 total,  8287228 free,        0 used.  7216956 avail Mem 

■セカンドカーネルがロードされているかどうか、
 メモリ、Swapの利用状況はどうなっているか。

crash> cat /sys/kernel/kexec_crash_loaded
1

crash> sys all
...

crash> kmem -i
                 PAGES        TOTAL      PERCENTAGE
    TOTAL MEM  1987579       7.6 GB         ----
         FREE  1856768       7.1 GB   93% of TOTAL MEM
         USED   130811       511 MB    6% of TOTAL MEM
       SHARED    40216     157.1 MB    2% of TOTAL MEM
      BUFFERS     6591      25.7 MB    0% of TOTAL MEM
       CACHED        0            0    0% of TOTAL MEM
         SLAB     9558      37.3 MB    0% of TOTAL MEM

   TOTAL SWAP  2071807       7.9 GB         ----
    SWAP USED        0            0    0% of TOTAL SWAP
    SWAP FREE  2071807       7.9 GB  100% of TOTAL SWAP

 COMMIT LIMIT  3065596      11.7 GB         ----
    COMMITTED   311914       1.2 GB   10% of TOTAL LIMIT

crash> swap
SWAP_INFO_STRUCT    TYPE       SIZE       USED     PCT  PRI  FILENAME
ffff8a4beb951800  PARTITION  8287228k      0k       0%   -1  /dev/sda5

■PID 1383のbashからの「sysrq: SysRq : Trigger a crash」でkernel panicになっている。

$ sudo grep -A 2 -B 9 1383 /var/crash/201801092144/dmesg.201801092144 | grep -v "Modules linked in"
[  720.204677] sysrq: SysRq : Trigger a crash
[  720.204786] BUG: unable to handle kernel NULL pointer dereference at           (null)
[  720.204789] IP: [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
[  720.204832] PGD 800000022a5ec067 
[  720.204833] PUD 22c0da067 
[  720.204835] PMD 0 
[  720.204835] 
[  720.204837] Oops: 0002 [#1] SMP
[  720.204876] CPU: 0 PID: 1383 Comm: bash Not tainted 4.9.0-5-amd64 #1 Debian 4.9.65-3+deb9u2
[  720.204877] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016
[  720.204879] task: ffff8a4beac25180 task.stack: ffff9ce9c2298000

■PID 1383はbash。
 「write_sysrq_trigger at ffffffffb4e203eb」あたりかな。

crash> bt -r 1383 | head -5
PID: 1383   TASK: ffff8a4beac25180  CPU: 0   COMMAND: "bash"
ffff9ce9c2298000:  0000000057ac6e9d 0000000000000000 
ffff9ce9c2298010:  0000000000000000 0000000000000000 
ffff9ce9c2298020:  0000000000000000 0000000000000000 
ffff9ce9c2298030:  0000000000000000 0000000000000000 

crash> bt -f 1383 | head -5
PID: 1383   TASK: ffff8a4beac25180  CPU: 0   COMMAND: "bash"
 #0 [ffff9ce9c229bbc0] machine_kexec at ffffffffb4a515c8
    ffff9ce9c229bbc8: 0000a9dc17f40fcd ffff8a49c0000000 
    ffff9ce9c229bbd8: 000000002d001000 ffff8a49ed001000 
    ffff9ce9c229bbe8: 000000002d000000 8d28220100010800 

crash> bt 1383
PID: 1383   TASK: ffff8a4beac25180  CPU: 0   COMMAND: "bash"
 #0 [ffff9ce9c229bbc0] machine_kexec at ffffffffb4a515c8
 #1 [ffff9ce9c229bc18] __crash_kexec at ffffffffb4b02f89
 #2 [ffff9ce9c229bcd8] crash_kexec at ffffffffb4b030a8
 #3 [ffff9ce9c229bcf0] oops_end at ffffffffb4a288a3
 #4 [ffff9ce9c229bd10] no_context at ffffffffb4a5e511
 #5 [ffff9ce9c229bd70] __do_page_fault at ffffffffb4a5efe3
 #6 [ffff9ce9c229bde0] page_fault at ffffffffb5008b58
    [exception RIP: sysrq_handle_crash+18]
    RIP: ffffffffb4e1f892  RSP: ffff9ce9c229be90  RFLAGS: 00010282
    RAX: 000000000000000f  RBX: 0000000000000063  RCX: 0000000000000000
    RDX: 0000000000000000  RSI: ffff8a4bf9c10608  RDI: 0000000000000063
    RBP: ffffffffb56bf040   R8: 0000000000000001   R9: 0000000000000af4
    R10: 0000000000000001  R11: 0000000000000001  R12: 0000000000000004
    R13: 0000000000000000  R14: 0000000000000000  R15: 0000000000000002
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #7 [ffff9ce9c229be90] __handle_sysrq at ffffffffb4e1ffb1
 #8 [ffff9ce9c229beb8] write_sysrq_trigger at ffffffffb4e203eb
 #9 [ffff9ce9c229bec8] proc_reg_write at ffffffffb4c713bd
#10 [ffff9ce9c229bee0] vfs_write at ffffffffb4c02910
#11 [ffff9ce9c229bf10] sys_write at ffffffffb4c03d52
#12 [ffff9ce9c229bf50] system_call_fast_compare_end at ffffffffb500761e
    RIP: 00007f9d0934b760  RSP: 00007fff06ff68c8  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: 0000000000000004  RCX: 00007f9d0934b760
    RDX: 0000000000000002  RSI: 0000000001cf4008  RDI: 0000000000000001
    RBP: 0000000000000040   R8: 00007f9d0960b760   R9: 00007f9d09c41b40
    R10: 0000000000000097  R11: 0000000000000246  R12: 0000000001d05000
    R13: 0000000000000004  R14: 00000000004cbea4  R15: 0000000000000619
    ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b

crash> ps | tail -3
   1040   1039   1  ffff8a4bec00ae00  IN   0.1   20704   5316  bash
   1382   1040   0  ffff8a4beabc3000  IN   0.0   56612   3732  su
>  1383   1382   0  ffff8a4beac25180  RU   0.0   10664   3048  bash

crash> bt 1382 | head -1
PID: 1382   TASK: ffff8a4beabc3000  CPU: 0   COMMAND: "su"

■「sysrq-trigger」が原因であることを示すその他の情報

crash> dis sysrq | grep crash
  ffffffffb4e1f880 (t) sysrq_handle_crash
  ffffffffb56bf040 (d) sysrq_crash_op

crash> dis sysrq_handle_crash
0xffffffffb4e1f880 <sysrq_handle_crash>:        data32 data32 data32 xchg %ax,%ax [FTRACE NOP]
0xffffffffb4e1f885 <sysrq_handle_crash+5>:      movl   $0x1,0xa8d3b9(%rip)        # 0xffffffffb58acc48
0xffffffffb4e1f88f <sysrq_handle_crash+15>:     sfence 
0xffffffffb4e1f892 <sysrq_handle_crash+18>:     movb   $0x1,0x0
0xffffffffb4e1f89a <sysrq_handle_crash+26>:     retq   
0xffffffffb4e1f89b <sysrq_handle_crash+27>:     nopl   0x0(%rax,%rax,1)

crash> log | grep sysrq_handle_crash
[  720.204789] IP: [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
[  720.204880] RIP: 0010:[<ffffffffb4e1f892>]  [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20
[  720.205017] RIP  [<ffffffffb4e1f892>] sysrq_handle_crash+0x12/0x20

crash> dis sysrq_handle_crash+0x12
0xffffffffb4e1f892 <sysrq_handle_crash+18>:     movb   $0x1,0x0

crash> dis sysrq_handle_crash+18
0xffffffffb4e1f892 <sysrq_handle_crash+18>:     movb   $0x1,0x0

crash> files
PID: 1383   TASK: ffff8a4beac25180  CPU: 0   COMMAND: "bash"
ROOT: /    CWD: /home/labunix
 FD       FILE            DENTRY           INODE       TYPE PATH
  0 ffff8a4beb03c300 ffff8a4bec4e50c0 ffff8a4bee867000 CHR  /dev/pts/0
  1 ffff8a4bed760200 ffff8a4bec605000 ffff8a4bedbee2b8 REG  /proc/sysrq-trigger
  2 ffff8a4beb03c300 ffff8a4bec4e50c0 ffff8a4bee867000 CHR  /dev/pts/0
 10 ffff8a4beb03c300 ffff8a4bec4e50c0 ffff8a4bee867000 CHR  /dev/pts/0

crash> ps -t 1383
PID: 1383   TASK: ffff8a4beac25180  CPU: 0   COMMAND: "bash"
    RUN TIME: 00:00:00
  START TIME: 720188093927
       UTIME: 0
       STIME: 1

crash> ps -S
  RU: 3
  IN: 228

crash>  ps -a bash
PID: 1040   TASK: ffff8a4bec00ae00  CPU: 1   COMMAND: "bash"
ps: cannot access user stack address: 7ffd119f0e7c

PID: 1383   TASK: ffff8a4beac25180  CPU: 0   COMMAND: "bash"
ps: cannot access user stack address: 7fff06ff7791

crash>  set -p
    PID: 1383
COMMAND: "bash"
   TASK: ffff8a4beac25180  [THREAD_INFO: ffff8a4beac25180]
    CPU: 0
  STATE: TASK_RUNNING (SYSRQ)