Fellow FreeBSD Fans,
I may simply be hitting bug 229745, yet I figured I may as well ask on here.
'anybody know what to try? Thank you all very much.
This is FreeBSD 13.1-RELEASE amd64.
Problem
After copying ~12 to ~20 GB of data between two datasets in the same pool, the storage locks up. A power cycle's necessary to recover.
This is easy to reproduce; it happened five times out of five attempts.
When the storage locks up, these kernel messages appear on the console for all SATA ports (only two are shown here):
Stuff I've tried (all to no avail; suggestions found in bug 229745):
Details
I may simply be hitting bug 229745, yet I figured I may as well ask on here.
'anybody know what to try? Thank you all very much.
This is FreeBSD 13.1-RELEASE amd64.
Problem
After copying ~12 to ~20 GB of data between two datasets in the same pool, the storage locks up. A power cycle's necessary to recover.
This is easy to reproduce; it happened five times out of five attempts.
When the storage locks up, these kernel messages appear on the console for all SATA ports (only two are shown here):
Code:
acich0: Timeout on slot 3 port 0
acich0: is 00000008 cs 00000000 ss 00000000 rs 00000008 tfd 40 serr 00000000 cmd 10008317
(ada0:ahcich0:0:0:0): WRITE_FPDMA_QUEUED. AC8: 61 08 30 4a f8 40 03 00 00 00 00 00
(ada0:ahcich0:0:0:0): CAM status: Command timeout
(ada0:ahcich0:0:0:0): Retrying command, 3 more tries remain
acich2: Timeout on slot 23 port 0
acich0: is 00000000 cs 03e00000 ss 03e00000 rs 03e00000 tfd 40 serr 00000000 cmd 10009417
(ada2:ahcich2:0:0:0): WRITE_FPDMA_QUEUED. AC8: 60 00 a0 70 ac 40 1b 00 00 01 00 00
(ada2:ahcich2:0:0:0): CAM status: Command timeout
(ada2:ahcich2:0:0:0): Retrying command, 3 more tries remain
Stuff I've tried (all to no avail; suggestions found in bug 229745):
- Adding
vfs.zfs.cache_flush_disable="1"
to /boot/loader.conf, shutting down FreeBSD and power cycling the computer. - Disabling NCQ with
camcontrol negotiate ada0 -T disable; camcontrol reset ada0
(and so on, for all four SSDs). - Downgrading the SATA revision by adding
hint.ahcich.0.sata_rev=2
to /boot/device.hints (for all ports) and rebooting. - Running
camcontrol tags ada0 -N 25
(for each port); its original value was 32. - Running
camcontrol tags ada0 -N 3
(for each port). - Attempting to disable SATA hot-plug in the BIOS (unfortunately, this option was not available in the BIOS).
Make | Supermicro |
Model | 5017A-EF |
BIOS | 1.1 (newest) |
SATA chip | Marvell 88SE9230 |
SSD type (OS pool) | Intel SSDSC2BB080G4 |
SSD type (data pool) | Intel SSDSC2BB480G4 |
Details
Code:
root@bmb:~ # camcontrol devlist
<INTEL SSDSC2BB080G4 D2010370> at scbus0 target 0 lun 0 (pass0,ada0)
<INTEL SSDSC2BB080G4 D2010370> at scbus1 target 0 lun 0 (pass1,ada1)
<INTEL SSDSC2BB480G4 D2010370> at scbus2 target 0 lun 0 (pass2,ada2)
<INTEL SSDSC2BB480G4 D2010370> at scbus3 target 0 lun 0 (pass3,ada3)
<Marvell Console 1.01> at scbus7 target 0 lun 0 (pass4)
root@bmb:~ # diskinfo -v ada0
ada0
512 # sectorsize
80026361856 # mediasize in bytes (75G)
156301488 # mediasize in sectors
4096 # stripesize
0 # stripeoffset
155061 # Cylinders according to firmware.
16 # Heads according to firmware.
63 # Sectors according to firmware.
INTEL SSDSC2BB080G4 # Disk descr.
BTWL428203CZ080KGN # Disk ident.
ahcich0 # Attachment
Yes # TRIM/UNMAP support
0 # Rotation rate in RPM
Not_Zoned # Zone Mode
root@bmb:~ # diskinfo -v ada1
ada1
512 # sectorsize
80026361856 # mediasize in bytes (75G)
156301488 # mediasize in sectors
4096 # stripesize
0 # stripeoffset
155061 # Cylinders according to firmware.
16 # Heads according to firmware.
63 # Sectors according to firmware.
INTEL SSDSC2BB080G4 # Disk descr.
BTWL428504CZ080KGN # Disk ident.
ahcich1 # Attachment
Yes # TRIM/UNMAP support
0 # Rotation rate in RPM
Not_Zoned # Zone Mode
root@bmb:~ # diskinfo -v ada2
ada2
512 # sectorsize
480103981056 # mediasize in bytes (447G)
937703088 # mediasize in sectors
4096 # stripesize
0 # stripeoffset
930261 # Cylinders according to firmware.
16 # Heads according to firmware.
63 # Sectors according to firmware.
INTEL SSDSC2BB480G4 # Disk descr.
PHWL445100D3480QGN # Disk ident.
ahcich2 # Attachment
Yes # TRIM/UNMAP support
0 # Rotation rate in RPM
Not_Zoned # Zone Mode
root@bmb:~ # diskinfo -v ada3
ada3
512 # sectorsize
480103981056 # mediasize in bytes (447G)
937703088 # mediasize in sectors
4096 # stripesize
0 # stripeoffset
930261 # Cylinders according to firmware.
16 # Heads according to firmware.
63 # Sectors according to firmware.
INTEL SSDSC2BB480G4 # Disk descr.
PHWL445301TK480QGN # Disk ident.
ahcich3 # Attachment
Yes # TRIM/UNMAP support
0 # Rotation rate in RPM
Not_Zoned # Zone Mode
root@bmb:~ # zpool status
pool: v
state: ONLINE
scan: scrub repaired 0B in 00:00:06 with 0 errors on Fri May 20 20:00:17 2022
config:
NAME STATE READ WRITE CKSUM
v ONLINE 0 0 0
mirror-0 ONLINE 0 0 0
gpt/TOP_428504CZ ONLINE 0 0 0
gpt/BOT_428203CZ ONLINE 0 0 0
errors: No known data errors
pool: x
state: ONLINE
scan: scrub repaired 0B in 00:03:41 with 0 errors on Fri May 20 20:04:07 2022
config:
NAME STATE READ WRITE CKSUM
x ONLINE 0 0 0
mirror-0 ONLINE 0 0 0
gpt/TOP_445301TK ONLINE 0 0 0
gpt/BOT_445100D3 ONLINE 0 0 0
errors: No known data errors
root@bmb:~ # zpool list
NAME SIZE ALLOC FREE CKPOINT EXPANDSZ FRAG CAP DEDUP HEALTH ALTROOT
v 66G 1.26G 64.7G - - 0% 1% 1.00x ONLINE -
x 444G 55.1G 389G - - 0% 12% 1.00x ONLINE -
root@bmb:~ # dmesg
Copyright (c) 1992-2021 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
The Regents of the University of California. All rights reserved.
FreeBSD is a registered trademark of The FreeBSD Foundation.
FreeBSD 13.1-RELEASE releng/13.1-n250148-fc952ac2212 GENERIC amd64
FreeBSD clang version 13.0.0 (git@github.com:llvm/llvm-project.git llvmorg-13.0.0-0-gd7b669b3a303)
VT(efifb): resolution 1024x768
CPU: Intel(R) Atom(TM) CPU S1260 @ 2.00GHz (1995.05-MHz K8-class CPU)
Origin="GenuineIntel" Id=0x30669 Family=0x6 Model=0x36 Stepping=9
Features=0xbfebfbff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CLFLUSH,DTS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE>
Features2=0x40e3bd<SSE3,DTES64,MON,DS_CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,MOVBE>
AMD Features=0x20100800<SYSCALL,NX,LM>
AMD Features2=0x1<LAHF>
VT-x: (disabled in BIOS) HLT,PAUSE
TSC: P-state invariant, performance statistics
real memory = 8589934592 (8192 MB)
avail memory = 8253661184 (7871 MB)
Event timer "LAPIC" quality 600
ACPI APIC Table: <SUPERM SMCI--MB>
FreeBSD/SMP: Multiprocessor System Detected: 4 CPUs
FreeBSD/SMP: 1 package(s) x 2 core(s) x 2 hardware threads
random: unblocking device.
Firmware Warning (ACPI): 32/64X length mismatch in FADT/Pm1aControlBlock: 16/32 (20201113/tbfadt-748)
Firmware Warning (ACPI): Optional FADT field Pm2ControlBlock has valid Address but zero Length: 0x00000000000009F4/0x0 (20201113/tbfadt-796)
Firmware Warning (ACPI): Invalid length for FADT/Pm1aControlBlock: 32, using default 16 (20201113/tbfadt-850)
Firmware Warning (ACPI): Invalid length for FADT/Pm2ControlBlock: 0, using default 8 (20201113/tbfadt-850)
ioapic0 <Version 2.0> irqs 0-23
Launching APs: 2 3 1
random: entropy device external interface
kbd0 at kbdmux0
efirtc0: <EFI Realtime Clock>
efirtc0: registered as a time-of-day clock, resolution 1.000000s
smbios0: <System Management BIOS> at iomem 0xf04c0-0xf04de
smbios0: Version: 2.7, BCD Revision: 2.7
aesni0: No AES or SHA support.
acpi0: <SUPERM SMCI--MB>
cpu0: <ACPI CPU> on acpi0
attimer0: <AT timer> port 0x40-0x43,0x50-0x53 irq 0 on acpi0
Timecounter "i8254" frequency 1193182 Hz quality 0
Event timer "i8254" frequency 1193182 Hz quality 100
atrtc0: <AT realtime clock> port 0x70-0x77 irq 8 on acpi0
atrtc0: registered as a time-of-day clock, resolution 1.000000s
Event timer "RTC" frequency 32768 Hz quality 0
hpet0: <High Precision Event Timer> iomem 0xfed00000-0xfed003ff on acpi0
Timecounter "HPET" frequency 14318180 Hz quality 950
Event timer "HPET" frequency 14318180 Hz quality 450
Event timer "HPET1" frequency 14318180 Hz quality 440
Event timer "HPET2" frequency 14318180 Hz quality 440
Timecounter "ACPI-fast" frequency 3579545 Hz quality 900
acpi_timer0: <24-bit timer at 3.579545MHz> port 0x908-0x90b on acpi0
apei0: <ACPI Platform Error Interface> on acpi0
pcib0: <ACPI Host-PCI bridge> port 0xcf8-0xcff on acpi0
pci0: <ACPI PCI bus> on pcib0
pcib1: <ACPI PCI-PCI bridge> mem 0xfe460000-0xfe47ffff irq 21 at device 1.0 on pci0
pci1: <ACPI PCI bus> on pcib1
ahci0: <Marvell 88SE9230 AHCI SATA controller> port 0xe050-0xe057,0xe040-0xe043,0xe030-0xe037,0xe020-0xe023,0xe000-0xe01f mem 0xfe310000-0xfe3107ff irq 21 at device 0.0 on pci1
ahci0: AHCI v1.20 with 8 6Gbps ports, Port Multiplier not supported
ahci0: quirks=0x1000900<NOBSYRES,ALTSIG,IOMMU_BUSWIDE>
ahcich0: <AHCI channel> at channel 0 on ahci0
ahcich1: <AHCI channel> at channel 1 on ahci0
ahcich2: <AHCI channel> at channel 2 on ahci0
ahcich3: <AHCI channel> at channel 3 on ahci0
ahcich4: <AHCI channel> at channel 4 on ahci0
ahcich5: <AHCI channel> at channel 5 on ahci0
ahcich6: <AHCI channel> at channel 6 on ahci0
ahcich7: <AHCI channel> at channel 7 on ahci0
pcib2: <ACPI PCI-PCI bridge> mem 0xfe440000-0xfe45ffff irq 21 at device 2.0 on pci0
pci2: <ACPI PCI bus> on pcib2
xhci0: <XHCI (generic) USB 3.0 controller> mem 0xfe200000-0xfe201fff irq 21 at device 0.0 on pci2
xhci0: 64 bytes context size, 32-bit DMA
usbus0 on xhci0
usbus0: 5.0Gbps Super Speed USB v3.0
pcib3: <ACPI PCI-PCI bridge> mem 0xfe420000-0xfe43ffff irq 21 at device 3.0 on pci0
pci3: <ACPI PCI bus> on pcib3
pcib4: <ACPI PCI-PCI bridge> at device 0.0 on pci3
pci4: <ACPI PCI bus> on pcib4
vgapci0: <VGA-compatible display> mem 0xfc000000-0xfcffffff,0xfe000000-0xfe003fff,0xfd800000-0xfdffffff irq 22 at device 3.0 on pci4
vgapci0: Boot video device
pcib5: <ACPI PCI-PCI bridge> mem 0xfe400000-0xfe41ffff irq 21 at device 4.0 on pci0
pci5: <ACPI PCI bus> on pcib5
igb0: <Intel(R) I350 (Copper)> port 0xd020-0xd03f mem 0xfe120000-0xfe13ffff,0xfe144000-0xfe147fff irq 21 at device 0.0 on pci5 [145/6692]
igb0: EEPROM V1.61-0 eTrack 0x8000090e
igb0: Using 1024 TX descriptors and 1024 RX descriptors
igb0: Using 2 RX queues 2 TX queues
igb0: Using MSI-X interrupts with 3 vectors
igb0: Ethernet address: 00:25:90:e4:f4:c8
igb0: netmap queues/slots: TX 2/1024, RX 2/1024
igb1: <Intel(R) I350 (Copper)> port 0xd000-0xd01f mem 0xfe100000-0xfe11ffff,0xfe140000-0xfe143fff irq 22 at device 0.1 on pci5
igb1: EEPROM V1.61-0 eTrack 0x8000090e
igb1: Using 1024 TX descriptors and 1024 RX descriptors
igb1: Using 2 RX queues 2 TX queues
igb1: Using MSI-X interrupts with 3 vectors
igb1: Ethernet address: 00:25:90:e4:f4:c9
igb1: netmap queues/slots: TX 2/1024, RX 2/1024
pci0: <base peripheral, IOMMU> at device 14.0 (no driver attached)
pci0: <simple comms, UART> at device 20.0 (no driver attached)
isab0: <PCI-ISA bridge> at device 31.0 on pci0
isa0: <ISA bus> on isab0
acpi_button0: <Power Button> on acpi0
acpi_tz0: <Thermal Zone> on acpi0
acpi_tz1: <Thermal Zone> on acpi0
uart0: <16550 or compatible> port 0x3f8-0x3ff irq 4 flags 0x10 on acpi0
uart0: console (115200,n,8,1)
uart1: <16550 or compatible> port 0x2f8-0x2ff irq 3 on acpi0
orm0: <ISA Option ROMs> at iomem 0xc0000-0xc7fff,0xcb800-0xcc7ff,0xcc800-0xcd7ff pnpid ORM0000 on isa0
est0: <Enhanced SpeedStep Frequency Control> on cpu0
Timecounter "TSC" frequency 1994999624 Hz quality 1000
Timecounters tick every 1.000 msec
ZFS filesystem version: 5
ZFS storage pool version: features support (5000)
Trying to mount root from zfs:v/ROOT/default []...
ugen0.1: <(0x1912) XHCI root HUB> at usbus0
uhub0 on usbus0
uhub0: <(0x1912) XHCI root HUB, class 9/0, rev 3.00/1.00, addr 1> on usbus0
uhub0: 8 ports with 8 removable, self powered
Root mount waiting for: CAM usbus0
ugen0.2: <Realtek 802.11n NIC> at usbus0
ugen0.3: <LiteOn Lenovo Traditional USB Keyboard> at usbus0
ukbd0 on uhub0
ukbd0: <LiteOn Lenovo Traditional USB Keyboard, class 0/0, rev 2.00/1.14, addr 2> on usbus0
kbd1 at ukbd0
ada0 at ahcich0 bus 0 scbus0 target 0 lun 0
ada0: <INTEL SSDSC2BB080G4 D2010370> ACS-2 ATA SATA 3.x device
ada0: Serial Number BTWL428203CZ080KGN
ada0: 600.000MB/s transfers (SATA 3.x, UDMA6, PIO 512bytes)
ada0: Command Queueing enabled
ada0: 76319MB (156301488 512 byte sectors)
ada1 at ahcich1 bus 0 scbus1 target 0 lun 0
ada1: <INTEL SSDSC2BB080G4 D2010370> ACS-2 ATA SATA 3.x device
ada1: Serial Number BTWL428504CZ080KGN
ada1: 600.000MB/s transfers (SATA 3.x, UDMA6, PIO 512bytes)
ada1: Command Queueing enabled
ada1: 76319MB (156301488 512 byte sectors)
ada2 at ahcich2 bus 0 scbus2 target 0 lun 0
ada2: <INTEL SSDSC2BB480G4 D2010370> ACS-2 ATA SATA 3.x device
ada2: Serial Number PHWL445100D3480QGN
ada2: 600.000MB/s transfers (SATA 3.x, UDMA6, PIO 512bytes)
ada2: Command Queueing enabled
ada2: 457862MB (937703088 512 byte sectors)
ada3 at ahcich3 bus 0 scbus3 target 0 lun 0
ada3: <INTEL SSDSC2BB480G4 D2010370> ACS-2 ATA SATA 3.x device
ada3: Serial Number PHWL445301TK480QGN
ada3: 600.000MB/s transfers (SATA 3.x, UDMA6, PIO 512bytes)
ada3: Command Queueing enabled
ada3: 457862MB (937703088 512 byte sectors)
pass4 at ahcich7 bus 0 scbus7 target 0 lun 0
pass4: <Marvell Console 1.01> Removable Processor SCSI device
pass4: Serial Number HKDP221516WL
pass4: 150.000MB/s transfers (SATA 1.x, UDMA4, ATAPI 12bytes, PIO 8192bytes)
ugen0.4: <Winbond Electronics Corp Hermon USB hidmouse Device> at usbus0
ukbd1 on uhub0
ukbd1: <Winbond Electronics Corp Hermon USB hidmouse Device, class 0/0, rev 1.10/0.01, addr 3> on usbus0
kbd2 at ukbd1
Dual Console: Video Primary, Serial Secondary
igb0: changing name to 'lan0'
igb1: changing name to 'comcast'
lo0: link state changed to UP
lan0: set address: WARNING: network mask should be specified; using historical default
lan0: link state changed to UP
comcast: link state changed to UP
rtwn0 on uhub0
rtwn0: <Realtek 802.11n NIC, class 0/0, rev 2.00/0.00, addr 1> on usbus0
rtwn0: MAC/BB RTL8188EU, RF 6052 1T1R
uhid0 on uhub0
uhid0: <LiteOn Lenovo Traditional USB Keyboard, class 0/0, rev 2.00/1.14, addr 2> on usbus0
ums0 on uhub0
ums0: <Winbond Electronics Corp Hermon USB hidmouse Device, class 0/0, rev 1.10/0.01, addr 3> on usbus0
ums0: 3 buttons and [Z] coordinates ID=0
ipfw2 (+ipv6) initialized, divert loadable, nat loadable, default to deny, logging disabled
Security policy loaded: MAC/ntpd (mac_ntpd)
root@bmb:~ # pciconf -lv
hostb0@pci0:0:0:0: class=0x060000 rev=0x02 hdr=0x00 vendor=0x8086 device=0x0c75 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 Internal'
class = bridge
subclass = HOST-PCI
pcib1@pci0:0:1:0: class=0x060400 rev=0x02 hdr=0x01 vendor=0x8086 device=0x0c46 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 PCI Express Root Port 1'
class = bridge
subclass = PCI-PCI
pcib2@pci0:0:2:0: class=0x060400 rev=0x02 hdr=0x01 vendor=0x8086 device=0x0c47 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 PCI Express Root Port 2'
class = bridge
subclass = PCI-PCI
pcib3@pci0:0:3:0: class=0x060400 rev=0x02 hdr=0x01 vendor=0x8086 device=0x0c48 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 PCI Express Root Port 3'
class = bridge
subclass = PCI-PCI
pcib5@pci0:0:4:0: class=0x060400 rev=0x02 hdr=0x01 vendor=0x8086 device=0x0c49 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 PCI Express Root Port 4'
class = bridge
subclass = PCI-PCI
none0@pci0:0:14:0: class=0x080600 rev=0x02 hdr=0x00 vendor=0x8086 device=0x0c54 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 Internal'
class = base peripheral
subclass = IOMMU
none1@pci0:0:19:0: class=0x088000 rev=0x02 hdr=0x00 vendor=0x8086 device=0x0c59 subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 SMBus 2.0 Controller 0'
class = base peripheral
none2@pci0:0:19:1: class=0x088000 rev=0x02 hdr=0x00 vendor=0x8086 device=0x0c5a subvendor=0x8086 subdevice=0x0000
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 SMBus 2.0 Controller 1'
class = base peripheral
none3@pci0:0:20:0: class=0x070002 rev=0x02 hdr=0x00 vendor=0x8086 device=0x0c5f subvendor=0x15d9 subdevice=0x0651
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 UART'
class = simple comms
subclass = UART
isab0@pci0:0:31:0: class=0x060100 rev=0x02 hdr=0x00 vendor=0x8086 device=0x0c60 subvendor=0x15d9 subdevice=0x0651
vendor = 'Intel Corporation'
device = 'Atom Processor S1200 Integrated Legacy Bus'
class = bridge
subclass = PCI-ISA
ahci0@pci0:1:0:0: class=0x010601 rev=0x10 hdr=0x00 vendor=0x1b4b device=0x9230 subvendor=0x1b4b subdevice=0x9230
vendor = 'Marvell Technology Group Ltd.'
device = '88SE9230 PCIe 2.0 x2 4-port SATA 6 Gb/s RAID Controller'
class = mass storage
subclass = SATA
xhci0@pci0:2:0:0: class=0x0c0330 rev=0x03 hdr=0x00 vendor=0x1912 device=0x0014 subvendor=0x0000 subdevice=0x0000
vendor = 'Renesas Technology Corp.'
device = 'uPD720201 USB 3.0 Host Controller'
class = serial bus
subclass = USB
pcib4@pci0:3:0:0: class=0x060400 rev=0x01 hdr=0x01 vendor=0x10e3 device=0x8113 subvendor=0x15d9 subdevice=0x0651
vendor = 'Tundra Semiconductor Corp.'
class = bridge
subclass = PCI-PCI
vgapci0@pci0:4:3:0: class=0x030000 rev=0x0a hdr=0x00 vendor=0x102b device=0x0532 subvendor=0x0000 subdevice=0x0000
vendor = 'Matrox Electronics Systems Ltd.'
device = 'MGA G200eW WPCM450'
class = display
subclass = VGA
igb0@pci0:5:0:0: class=0x020000 rev=0x01 hdr=0x00 vendor=0x8086 device=0x1521 subvendor=0x15d9 subdevice=0x1521
vendor = 'Intel Corporation'
device = 'I350 Gigabit Network Connection'
class = network
subclass = ethernet
igb1@pci0:5:0:1: class=0x020000 rev=0x01 hdr=0x00 vendor=0x8086 device=0x1521 subvendor=0x15d9 subdevice=0x1521
vendor = 'Intel Corporation'
device = 'I350 Gigabit Network Connection'
class = network
subclass = ethernet