bug I encountered in S10 that may be present in ZFS

nixlike · Jun 3, 2014

Code:

release: 5.10 (64-bit)
version: Generic_150401-11
machine: i86pc
node name: ****
system type: i86pc
hostid: 2aa69f5e
dump_conflags: 0x10000 (DUMP_KERNEL) on /dev/zvol/dsk/rpool/dump(2G)
time of crash: Sun Jun 1 06:30:24 UTC 2014
age of system: 26 days 6 hours 28 minutes 50 seconds
panic CPU: 1 (16 CPUs, 71.9G memory)
panic string: BAD TRAP: type=e (#pf Page fault) rp=fffffe8001bb3c20 addr=28 occurred in module "zfs" due to a NULL pointer dereference


panic on CPU 1
panic string: BAD TRAP: type=e (#pf Page fault) rp=fffffe8001bb3c20 addr=28 occurred in module "zfs" due to a NULL pointer dereference
==== panic user (LWP_SYS) thread: 0xffffffffa9565840 PID: 553 on CPU: 1 ====
cmd: /usr/lib/nfs/nfsd
t_procp: 0xffffffffc52dc4b0
p_as: 0xffffffffa92425b0 size: 2723840 RSS: 1814528
a_hat: 0xffffffffa946dbc0
cpuset: 1
p_zone: 0xfffffffffbcd4ee0 (global)
t_stk: 0xfffffe8001bb4f10 sp: 0xfffffe8001bb39e0 t_stkbase: 0xfffffe8001bb0000
t_pri: 60 (FX) pctcpu: 0.785410
t_transience: 0 t_wkld_flags: 0
t_lwp: 0xffffffffacd92740 t_tid: 97
lwp_regs: 0xfffffe8001bb4f10
lwp_ap: 0xfffffe8001bb4ed0
t_mstate: LMS_SYSTEM ms_prev: LMS_KFAULT
ms_state_start: 36 days 14 hours 53 minutes 23.935007313 seconds later
ms_start: 26 days 6 hours 10 minutes 10.397237971 seconds earlier
t_cpupart: 0xfffffffffbc51f80(0) last CPU: 1
idle: 1 ticks (0.01s)
start: Tue May 6 00:06:53 2014
age: 2269411 seconds (26 days 6 hours 23 minutes 31 seconds)
t_state: TS_ONPROC
t_flag: 0x1880 (T_DONTPEND|T_PANIC|T_LWPREUSE)
t_proc_flag: 0x100 (TP_MSACCT)
t_schedflag: 3 (TS_LOAD|TS_DONT_SWAP)
p_flag: 0x42000000 (SMSACCT|SMSFORK)

pc: unix:vpanic_common+0x165: addq $0xf0,%rsp

unix:vpanic_common+0x165()
unix:0xfffffffffb8672d2()
int unix:die+0xda((uint_t), (struct regs *), (caddr_t), (processorid_t))
void unix:trap+0x5e6((struct regs *), (caddr_t), (processorid_t))
unix:_cmntrap+0x140()
-- panic trap data type: 0xe (Page fault)
addr 0x28 rp 0xfffffe8001bb3c20
trapno 0xe (Page fault)
err 0 (page not present,read,supervisor)
%rfl 0x10213 (carry|auxilliary carry|interrupt enable|resume)
savfp 0xfffffe8001bb3d20
savpc zfs:arc_hash_replace+0x24: movq 0x28(%rax),%rax

%rbp 0xfffffe8001bb3d20 %rsp 0xfffffe8001bb3d10
%rip zfs:arc_hash_replace+0x24: movq 0x28(%rax),%rax

0%rdi 0xfffffea57adc4758 1%rsi 0xfffffea57d48e420 2%rdx 0x28
3%rcx 0 4%r8 0x43dffd 5%r9 0

%rax 0 %rbx 0xfffffea57d48e420
%r10 0xfffffffffb83edb4 %r11 1 %r12 0xfffffea57d48e420
%r13 0xfffffffffbcfe440 %r14 0 %r15 0xffffffff94b2bf40

%cs 0x28 (KCS_SEL) %ds 0x43 (UDS_SEL)
%es 0x43 (UDS_SEL) %ss 0x30 (KDS_SEL)
%fs 0 (KFS_SEL) %gs 0x1c3 (LWPGS_SEL)
fsbase 0
gsbase 0xffffffffa786c800
<trap>void zfs:arc_hash_replace+0x24((arc_buf_t *), (arc_hdr_t *))
void zfs:arc_kill_buf+0xd1((arc_buf_t *), (boolean_t))
int zfs:arc_evict_buf+0x139((arc_evict_list_t *), (arc_elink_t *), (uint64_t *))
void zfs:arc_evict_bytes+0x87((arc_state_t *), (int64_t))
void *zfs:arc_get_data_block+0xa2((uint64_t), (boolean_t))
int zfs:arc_read+0x3ec((zio_t *), (spa_t *), (const blkptr_t *), (uint64_t), (arc_done_func_t *), (void *), (int), (enum zio_flag), (arc_options_t), (const zbookmark_t *))
int zfs:dsl_read+0x2a((zio_t *), (spa_t *), (const blkptr_t *), (uint64_t), (arc_done_func_t *), (void *), (int), (enum zio_flag), (arc_options_t), (const zbookmark_t *))
void zfs:dbuf_prefetch+0x156((dnode_t *), (uint64_t))
uint64_t zfs:dmu_zfetch_fetch+0x40((dnode_t *), (uint64_t), (uint64_t))
void zfs:dmu_zfetch_dofetch+0x7c((zfetch_t *), (zstream_t *))
int zfs:dmu_zfetch_find+0x369((zfetch_t *), (zstream_t *), (int))
void zfs:dmu_zfetch+0xc8((zfetch_t *), (uint64_t), (uint64_t), (int))
int zfs:dbuf_read+0x259((dmu_buf_impl_t *), (zio_t *), (uint32_t))
int zfs:dmu_tx_check_ioerr+0x6e((zio_t *), (dnode_t *), (int), (uint64_t))
void zfs:dmu_tx_count_write+0x4d7((dmu_tx_hold_t *), (uint64_t), (uint64_t))
void zfs:dmu_tx_hold_write+0x4a((dmu_tx_t *), (uint64_t), (uint64_t), (int))
int zfs:zfs_write+0x3f4((vnode_t *), (uio_t *), (int), (cred_t *), (caller_context_t *))
int zfs:zfs_shim_write+0xc((vnode_t *), (uio_t *), (int), (cred_t *))
int genunix:fop_write+0x31((vnode_t *), (uio_t *), (int), (cred_t *), (struct caller_context *))
void nfssrv:rfs3_write+0x459((WRITE3args *), (WRITE3res *), (struct exportinfo *), (struct svc_req *), (cred_t *))
void nfssrv:common_dispatch+0x5b8((struct svc_req *), (SVCXPRT *), (rpcvers_t), (rpcvers_t), (char *), (struct rpc_disptable *))
void nfssrv:rfs_dispatch+0x21((struct svc_req *), (SVCXPRT *))
void rpcmod:svc_getreq+0x209((SVCXPRT *), (mblk_t *))
int rpcmod:svc_run+0x124((SVCPOOL *))
int rpcmod:svc_do_run+0x88((int))
int nfs:nfssys+0x16a((enum nfssys_op), (void *))
unix:_sys_sysenter_post_swapgs+0x14b()
-- switch to user thread's user stack --


!! This is the source for the panicking routine:


/*
899 * Replace a hash table entry with a new entry. This is most often
900 * used to swap a ghost entry in when we evict data from the arc,
901 * but also is used in sync-to-convergence.
902 */
903static void
904arc_hash_replace(arc_buf_t *old, arc_hdr_t *new)
905{
906 arc_hdr_t **hdrp;
907 arc_hdr_t *hdr = &old->b_id;
908 kmutex_t *hash_lock = HASH_LOCK(old);
909
910 ASSERT(MUTEX_HELD(hash_lock));
911 ASSERT(BUF_HASHED(old));
912 ASSERT(!BUF_IS_HOLE(old));
913 ASSERT(hdr->h_hash_idx == new->h_hash_idx);
914
915 hdrp = &arc_hash_table.ht_table[hdr->h_hash_idx];
916 while (*hdrp != hdr)
917 hdrp = &(*hdrp)->h_next; <<<<<
918 *hdrp = new;
919 new->h_next = hdr->h_next;
920 hdr->h_next = NULL;
921 ARC_FLAG_CLEAR(old, HASHED);
922 (void) refcount_remove(&old->b_active, &arc_hash_table);
923 ARC_FLAG_SET(new, HASHED);
924 if (!BUF_GHOST(new)) {
925 arc_buf_t *buf = (arc_buf_t *)new;
926 arc_hold(buf, &arc_hash_table);
927 }
928}


> rdi -f arc_hash_replace
zfs:arc_hash_replace+0: pushq %rbp
zfs:arc_hash_replace+1: movq %rsp,%rbp
zfs:arc_hash_replace+4: pushq %rbx
zfs:arc_hash_replace+5: movq %rsi,%rbx
zfs:arc_hash_replace+8: subq $0x8,%rsp
zfs:arc_hash_replace+0xc: movl 0x24(%rdi),%edx
zfs:arc_hash_replace+0xf: movq +0x4418682(%rip),%rax <zfs(data):arc_hash_table+8>
zfs:arc_hash_replace+0x16: leaq (%rax,%rdx,8),%rdx
zfs:arc_hash_replace+0x1a: movq (%rdx),%rax
zfs:arc_hash_replace+0x1d: jmp +0x9 <zfs:arc_hash_replace+0x28> (2f)
zfs:arc_hash_replace+0x1f: nop

zfs:arc_hash_replace+0x20:1: leaq 0x28(%rax),%rdx
zfs:arc_hash_replace+0x24: movq 0x28(%rax),%rax <<<<< panic here

zfs:arc_hash_replace+0x28:2: cmpq %rdi,%rax
zfs:arc_hash_replace+0x2b: jne -0xd <zfs:arc_hash_replace+0x20> (1b)
zfs:arc_hash_replace+0x2d: movq %rbx,(%rdx)
.....


> sdump 0xfffffea57d48e420 arc_hdr_t
arc_hdr_t {
uint64_t h_spa = 0x3e3aad6c7fe44621
dva_t h_dva = {
uint64_t [2] dva_word = [ 0x200000100 0x3d5b5e80 ]
}
uint64_t h_birth = 0x45a6ea
arc_flags_t h_flags = 0x92
uint32_t h_hash_idx = 0x1620bd
arc_hdr_t *h_next = NULL
}

!! The system panicked because this arc_hdr structure contained
!! a NULL h_next entry.

Bug 18247089 [https://bug.oraclecorp.com/pls/bug/webbug_edit.edit_info_top?rptno=18247089] - bad trap panic in arc_hash_remove()

SirDice · Jun 3, 2014

Are we supposed to magically understand what this is?

nixlike · Jun 3, 2014

That is debug output and source code reference of the bug encountered. It may or may not be relevant to the code forked some time ago by FreeBSD, I just thought I would raise awareness of the potential.

SirDice · Jun 3, 2014

There are very few developers on here. These boards are mainly geared towards users.

I would suggest contacting the freebsd-fs@ mailinglist. But do elaborate what exactly it is, when it happens and why or you'll be greated by a deafening silence.

wblock@ · Jun 3, 2014

Release 5.10 of what?

SirDice · Jun 3, 2014

wblock@ said:
Release 5.10 of what?

I'm guessing it's Solaris 10.

nixlike · Jun 3, 2014

Correct. The kernel core dump that was used in the debug session came from Solaris 10 with a pretty recent patch cluster applied.

nixlike · Jun 3, 2014

SirDice,

Thank you for pointing out the mailing list. I will email them a reference to this thread. If it is met with silence, then it may not be relevant.

Regards.

richardelling · Aug 7, 2014

This is one or more known bugs in closed-source Solaris ZFS. The arc_hash does not exist in the open source implementations of ZFS. Contact Oracle for a solution.

-- richard

bug I encountered in S10 that may be present in ZFS

nixlike

SirDice

Administrator

nixlike

SirDice

Administrator

wblock@

SirDice

Administrator

nixlike

nixlike

richardelling