bug I encountered in S10 that may be present in ZFS

Code:
release: 5.10 (64-bit)
version: Generic_150401-11
machine: i86pc
node name: ****
system type: i86pc
hostid: 2aa69f5e
dump_conflags: 0x10000 (DUMP_KERNEL) on /dev/zvol/dsk/rpool/dump(2G)
time of crash: Sun Jun 1 06:30:24 UTC 2014
age of system: 26 days 6 hours 28 minutes 50 seconds
panic CPU: 1 (16 CPUs, 71.9G memory)
panic string: BAD TRAP: type=e (#pf Page fault) rp=fffffe8001bb3c20 addr=28 occurred in module "zfs" due to a NULL pointer dereference


panic on CPU 1
panic string: BAD TRAP: type=e (#pf Page fault) rp=fffffe8001bb3c20 addr=28 occurred in module "zfs" due to a NULL pointer dereference
==== panic user (LWP_SYS) thread: 0xffffffffa9565840 PID: 553 on CPU: 1 ====
cmd: /usr/lib/nfs/nfsd
t_procp: 0xffffffffc52dc4b0
p_as: 0xffffffffa92425b0 size: 2723840 RSS: 1814528
a_hat: 0xffffffffa946dbc0
cpuset: 1
p_zone: 0xfffffffffbcd4ee0 (global)
t_stk: 0xfffffe8001bb4f10 sp: 0xfffffe8001bb39e0 t_stkbase: 0xfffffe8001bb0000
t_pri: 60 (FX) pctcpu: 0.785410
t_transience: 0 t_wkld_flags: 0
t_lwp: 0xffffffffacd92740 t_tid: 97
lwp_regs: 0xfffffe8001bb4f10
lwp_ap: 0xfffffe8001bb4ed0
t_mstate: LMS_SYSTEM ms_prev: LMS_KFAULT
ms_state_start: 36 days 14 hours 53 minutes 23.935007313 seconds later
ms_start: 26 days 6 hours 10 minutes 10.397237971 seconds earlier
t_cpupart: 0xfffffffffbc51f80(0) last CPU: 1
idle: 1 ticks (0.01s)
start: Tue May 6 00:06:53 2014
age: 2269411 seconds (26 days 6 hours 23 minutes 31 seconds)
t_state: TS_ONPROC
t_flag: 0x1880 (T_DONTPEND|T_PANIC|T_LWPREUSE)
t_proc_flag: 0x100 (TP_MSACCT)
t_schedflag: 3 (TS_LOAD|TS_DONT_SWAP)
p_flag: 0x42000000 (SMSACCT|SMSFORK)

pc: unix:vpanic_common+0x165: addq $0xf0,%rsp

unix:vpanic_common+0x165()
unix:0xfffffffffb8672d2()
int unix:die+0xda((uint_t), (struct regs *), (caddr_t), (processorid_t))
void unix:trap+0x5e6((struct regs *), (caddr_t), (processorid_t))
unix:_cmntrap+0x140()
-- panic trap data type: 0xe (Page fault)
addr 0x28 rp 0xfffffe8001bb3c20
trapno 0xe (Page fault)
err 0 (page not present,read,supervisor)
%rfl 0x10213 (carry|auxilliary carry|interrupt enable|resume)
savfp 0xfffffe8001bb3d20
savpc zfs:arc_hash_replace+0x24: movq 0x28(%rax),%rax

%rbp 0xfffffe8001bb3d20 %rsp 0xfffffe8001bb3d10
%rip zfs:arc_hash_replace+0x24: movq 0x28(%rax),%rax

0%rdi 0xfffffea57adc4758 1%rsi 0xfffffea57d48e420 2%rdx 0x28
3%rcx 0 4%r8 0x43dffd 5%r9 0

%rax 0 %rbx 0xfffffea57d48e420
%r10 0xfffffffffb83edb4 %r11 1 %r12 0xfffffea57d48e420
%r13 0xfffffffffbcfe440 %r14 0 %r15 0xffffffff94b2bf40

%cs 0x28 (KCS_SEL) %ds 0x43 (UDS_SEL)
%es 0x43 (UDS_SEL) %ss 0x30 (KDS_SEL)
%fs 0 (KFS_SEL) %gs 0x1c3 (LWPGS_SEL)
fsbase 0
gsbase 0xffffffffa786c800
<trap>void zfs:arc_hash_replace+0x24((arc_buf_t *), (arc_hdr_t *))
void zfs:arc_kill_buf+0xd1((arc_buf_t *), (boolean_t))
int zfs:arc_evict_buf+0x139((arc_evict_list_t *), (arc_elink_t *), (uint64_t *))
void zfs:arc_evict_bytes+0x87((arc_state_t *), (int64_t))
void *zfs:arc_get_data_block+0xa2((uint64_t), (boolean_t))
int zfs:arc_read+0x3ec((zio_t *), (spa_t *), (const blkptr_t *), (uint64_t), (arc_done_func_t *), (void *), (int), (enum zio_flag), (arc_options_t), (const zbookmark_t *))
int zfs:dsl_read+0x2a((zio_t *), (spa_t *), (const blkptr_t *), (uint64_t), (arc_done_func_t *), (void *), (int), (enum zio_flag), (arc_options_t), (const zbookmark_t *))
void zfs:dbuf_prefetch+0x156((dnode_t *), (uint64_t))
uint64_t zfs:dmu_zfetch_fetch+0x40((dnode_t *), (uint64_t), (uint64_t))
void zfs:dmu_zfetch_dofetch+0x7c((zfetch_t *), (zstream_t *))
int zfs:dmu_zfetch_find+0x369((zfetch_t *), (zstream_t *), (int))
void zfs:dmu_zfetch+0xc8((zfetch_t *), (uint64_t), (uint64_t), (int))
int zfs:dbuf_read+0x259((dmu_buf_impl_t *), (zio_t *), (uint32_t))
int zfs:dmu_tx_check_ioerr+0x6e((zio_t *), (dnode_t *), (int), (uint64_t))
void zfs:dmu_tx_count_write+0x4d7((dmu_tx_hold_t *), (uint64_t), (uint64_t))
void zfs:dmu_tx_hold_write+0x4a((dmu_tx_t *), (uint64_t), (uint64_t), (int))
int zfs:zfs_write+0x3f4((vnode_t *), (uio_t *), (int), (cred_t *), (caller_context_t *))
int zfs:zfs_shim_write+0xc((vnode_t *), (uio_t *), (int), (cred_t *))
int genunix:fop_write+0x31((vnode_t *), (uio_t *), (int), (cred_t *), (struct caller_context *))
void nfssrv:rfs3_write+0x459((WRITE3args *), (WRITE3res *), (struct exportinfo *), (struct svc_req *), (cred_t *))
void nfssrv:common_dispatch+0x5b8((struct svc_req *), (SVCXPRT *), (rpcvers_t), (rpcvers_t), (char *), (struct rpc_disptable *))
void nfssrv:rfs_dispatch+0x21((struct svc_req *), (SVCXPRT *))
void rpcmod:svc_getreq+0x209((SVCXPRT *), (mblk_t *))
int rpcmod:svc_run+0x124((SVCPOOL *))
int rpcmod:svc_do_run+0x88((int))
int nfs:nfssys+0x16a((enum nfssys_op), (void *))
unix:_sys_sysenter_post_swapgs+0x14b()
-- switch to user thread's user stack --


!! This is the source for the panicking routine:


/*
899 * Replace a hash table entry with a new entry. This is most often
900 * used to swap a ghost entry in when we evict data from the arc,
901 * but also is used in sync-to-convergence.
902 */
903static void
904arc_hash_replace(arc_buf_t *old, arc_hdr_t *new)
905{
906 arc_hdr_t **hdrp;
907 arc_hdr_t *hdr = &old->b_id;
908 kmutex_t *hash_lock = HASH_LOCK(old);
909
910 ASSERT(MUTEX_HELD(hash_lock));
911 ASSERT(BUF_HASHED(old));
912 ASSERT(!BUF_IS_HOLE(old));
913 ASSERT(hdr->h_hash_idx == new->h_hash_idx);
914
915 hdrp = &arc_hash_table.ht_table[hdr->h_hash_idx];
916 while (*hdrp != hdr)
917 hdrp = &(*hdrp)->h_next; <<<<<
918 *hdrp = new;
919 new->h_next = hdr->h_next;
920 hdr->h_next = NULL;
921 ARC_FLAG_CLEAR(old, HASHED);
922 (void) refcount_remove(&old->b_active, &arc_hash_table);
923 ARC_FLAG_SET(new, HASHED);
924 if (!BUF_GHOST(new)) {
925 arc_buf_t *buf = (arc_buf_t *)new;
926 arc_hold(buf, &arc_hash_table);
927 }
928}


> rdi -f arc_hash_replace
zfs:arc_hash_replace+0: pushq %rbp
zfs:arc_hash_replace+1: movq %rsp,%rbp
zfs:arc_hash_replace+4: pushq %rbx
zfs:arc_hash_replace+5: movq %rsi,%rbx
zfs:arc_hash_replace+8: subq $0x8,%rsp
zfs:arc_hash_replace+0xc: movl 0x24(%rdi),%edx
zfs:arc_hash_replace+0xf: movq +0x4418682(%rip),%rax <zfs(data):arc_hash_table+8>
zfs:arc_hash_replace+0x16: leaq (%rax,%rdx,8),%rdx
zfs:arc_hash_replace+0x1a: movq (%rdx),%rax
zfs:arc_hash_replace+0x1d: jmp +0x9 <zfs:arc_hash_replace+0x28> (2f)
zfs:arc_hash_replace+0x1f: nop

zfs:arc_hash_replace+0x20:1: leaq 0x28(%rax),%rdx
zfs:arc_hash_replace+0x24: movq 0x28(%rax),%rax <<<<< panic here

zfs:arc_hash_replace+0x28:2: cmpq %rdi,%rax
zfs:arc_hash_replace+0x2b: jne -0xd <zfs:arc_hash_replace+0x20> (1b)
zfs:arc_hash_replace+0x2d: movq %rbx,(%rdx)
.....


> sdump 0xfffffea57d48e420 arc_hdr_t
arc_hdr_t {
uint64_t h_spa = 0x3e3aad6c7fe44621
dva_t h_dva = {
uint64_t [2] dva_word = [ 0x200000100 0x3d5b5e80 ]
}
uint64_t h_birth = 0x45a6ea
arc_flags_t h_flags = 0x92
uint32_t h_hash_idx = 0x1620bd
arc_hdr_t *h_next = NULL
}

!! The system panicked because this arc_hdr structure contained
!! a NULL h_next entry.

Bug 18247089 [https://bug.oraclecorp.com/pls/bug/webbug_edit.edit_info_top?rptno=18247089] - bad trap panic in arc_hash_remove()
 
That is debug output and source code reference of the bug encountered. It may or may not be relevant to the code forked some time ago by FreeBSD, I just thought I would raise awareness of the potential.
 
There are very few developers on here. These boards are mainly geared towards users.

I would suggest contacting the freebsd-fs@ mailinglist. But do elaborate what exactly it is, when it happens and why or you'll be greated by a deafening silence.
 
Correct. The kernel core dump that was used in the debug session came from Solaris 10 with a pretty recent patch cluster applied.
 
SirDice,

Thank you for pointing out the mailing list. I will email them a reference to this thread. If it is met with silence, then it may not be relevant.

Regards.
 
This is one or more known bugs in closed-source Solaris ZFS. The arc_hash does not exist in the open source implementations of ZFS. Contact Oracle for a solution.

-- richard
 
Back
Top