Hello,
I have havy loeaded web-server. Usually it is ~5000 conn/s and 100 Mbit/s of traffic. Once per two weeks the server stop responding on network connections.
In dmesg I see:
OS: FreeBSD 12.2-STABLE 41cf333f9b2a(stable/12)-dirty: Sat Jan 2 01:49:01 EET 2021
/boot/loader.conf
/etc/sysctl.conf
How can I find where my mbufs leaks out? Is there something wrong with my settings?
Thank you!
I have havy loeaded web-server. Usually it is ~5000 conn/s and 100 Mbit/s of traffic. Once per two weeks the server stop responding on network connections.
In dmesg I see:
Code:
[zone: mbuf] kern.ipc.nmbufs limit reached
netstat -m
shows me that ~19Gb of RAM used by network:
Code:
78365279/16/78365295 mbufs in use (current/cache/total)
43996/18104/62100/12244576 mbuf clusters in use (current/cache/total/max)
2429/16 mbuf+clusters out of packet secondary zone in use (current/cache)
12584/2975/15559/6122288 4k (page size) jumbo clusters in use (current/cache/total/max)
0/0/0/1814011 9k jumbo clusters in use (current/cache/total/max)
0/0/0/1020381 16k jumbo clusters in use (current/cache/total/max)
[B]19729647K[/B]/48112K/19777759K bytes allocated to network (current/cache/total)
42375955/0/0 requests for mbufs denied (mbufs/clusters/mbuf+clusters)
404032/0/33003 requests for mbufs delayed (mbufs/clusters/mbuf+clusters)
0/0/0 requests for jumbo clusters delayed (4k/9k/16k)
0/0/0 requests for jumbo clusters denied (4k/9k/16k)
1899771 sendfile syscalls
399 sendfile syscalls completed without I/O request
1004979 requests for I/O initiated by sendfile
7900311 pages read by sendfile as part of a request
399 pages were valid at time of a sendfile request
0 pages were valid and substituted to bogus page
0 pages were requested for read ahead by applications
0 pages were read ahead by sendfile
0 times sendfile encountered an already busy page
0 requests for sfbufs denied
0 requests for sfbufs delayed
vmstat -z | grep -E '^ITEM|mbuf'
:
Code:
ITEM SIZE LIMIT USED FREE REQ FAIL SLEEP
mbuf_packet: 256, 78365295, 2360, 12,1353222657, 0,39166
mbuf: 256, 78365295,78362923, 0,27304412405,45796642,434120
mbuf_cluster: 2048, 12244576, 44213, 17887,1529376894, 0, 0
mbuf_jumbo_page: 4096, 6122288, 12388, 3306,4741583680, 0, 0
mbuf_jumbo_9k: 9216, 1814011, 0, 0, 0, 0, 0
mbuf_jumbo_16k: 16384, 1020381, 0, 0, 0, 0, 0
OS: FreeBSD 12.2-STABLE 41cf333f9b2a(stable/12)-dirty: Sat Jan 2 01:49:01 EET 2021
/boot/loader.conf
Code:
kern.geom.label.disk_ident.enable="0"
kern.geom.label.gptid.enable="0"
opensolaris_load="YES"
zfs_load="YES"
accf_http_load="YES"
accf_data_load="YES"
autoboot_delay="7"
ioat_load="YES"
cc_htcp_load="YES"
imcsmb_load="YES"
aesni_load="YES"
fuse_load="YES"
tcp_rack_load="YES"
# zfs
vfs.zfs.vdev.cache.size=0
vfs.zfs.arc_max=32G
#vfs.zfs.arc_max=64G
# syncache tuning
net.inet.tcp.syncache.hashsize=1024
net.inet.tcp.syncache.bucketlimit=100
net.inet.tcp.syncache.cachelimit=1048576
# hostcache tuning
net.inet.tcp.hostcache.hashsize=4096
net.inet.tcp.hostcache.bucketlimit=100
net.inet.tcp.hostcache.cachelimit=65536
net.link.ifqmaxlen=2048
kern.ipc.shmseg=10240
kern.ipc.shmmni=10240
net.inet.tcp.tcbhashsize=65536
hw.intr_storm_threshold=32000
kern.msgbufsize=262144
kern.ipc.nmbclusters=0
net.inet.tcp.soreceive_stream=1
# disable ARC compression
#vfs.zfs.compressed_arc_enabled=0
cpu_microcode_load="YES"
cpu_microcode_name="/boot/firmware/intel-ucode.bin"
boot_multicons="YES"
boot_serial="YES"
comconsole_speed="115200"
console="comconsole"
comconsole_port="0x2f8"
/etc/sysctl.conf
Code:
vfs.usermount=1
security.bsd.see_other_uids=1
security.bsd.see_other_gids=1
security.bsd.see_jail_proc=0
security.bsd.unprivileged_read_msgbuf=0
security.bsd.unprivileged_proc_debug=0
vfs.zfs.min_auto_ashift=12
net.inet.ip.redirect=0
net.inet.icmp.drop_redirect=1
net.inet.icmp.log_redirect=0
vfs.zfs.prefetch_disable=1
kern.ipc.somaxconn=65535
net.inet.tcp.maxtcptw=102400
# maximum number of interrupts per second on any interrupt level
# (vmstat -i for total rate). If you still see Interrupt Storm detected messages,
# increase the limit to a higher number and look for the culprit. (default 1000)
hw.intr_storm_threshold=12000
kern.ipc.maxsockbuf=33554432
kern.maxvnodes=8000000
net.inet.tcp.cc.algorithm=htcp
net.inet.tcp.cc.htcp.adaptive_backoff=1
net.inet.tcp.cc.htcp.rtt_scaling=1
net.inet.icmp.icmplim=5000
net.inet.tcp.tso=1
vfs.zfs.txg.timeout=2
vfs.zfs.trim.txg_delay=3
# for NVMe
vfs.zfs.delay_min_dirty_percent=95
vfs.zfs.dirty_data_max=12884901888
vfs.zfs.top_maxinflight=128
vfs.zfs.vdev.aggregation_limit=524288
vfs.zfs.vdev.scrub_max_active=3
vm.lowmem_period=0
net.inet.tcp.fast_finwait2_recycle=1
net.inet.tcp.delayed_ack=1
net.inet.tcp.delacktime=100
net.inet.tcp.blackhole=0
net.inet.udp.blackhole=1
kern.ipc.maxsockbuf=2097152
net.inet.udp.maxdgram=57344
net.inet.ip.intr_queue_maxlen=5000
kern.ipc.shmmax=2147483648
kern.ipc.maxsockbuf=83886080
net.route.netisr_maxqlen=4096
net.inet.tcp.maxtcptw=3149624
net.inet.tcp.nolocaltimewait=1
net.inet.ip.portrange.first=1024
net.inet.ip.portrange.last=65535
net.inet.ip.portrange.randomized=0
net.inet.tcp.msl=15000
net.inet.tcp.path_mtu_discovery=1
net.inet.tcp.drop_synfin=1
net.inet.ip.process_options=0
kern.corefile="/var/tmp/%U.%N.core"
kern.ipc.shm_use_phys=1
net.inet.tcp.rfc3390=1
kern.ipc.shm_allow_removed=1
net.inet.tcp.sendspace=65536
net.inet.tcp.sendbuf_inc=32768
net.inet.tcp.sendbuf_max=16777216
net.inet.tcp.sendbuf_auto=1
net.inet.tcp.recvspace=32768
net.inet.tcp.recvbuf_max=16777216
net.inet.tcp.recvbuf_inc=8192
net.inet.tcp.recvbuf_auto=1
# for localhost
net.inet.raw.maxdgram=16384
net.inet.raw.recvspace=16384
net.local.stream.sendspace=163840 # lo0 mtu 16384 x 10
net.local.stream.recvspace=163840 # lo0 mtu 16384 x 10
net.local.dgram.maxdgram=65535
net.inet.tcp.fastopen.server_enable=1
vfs.timestamp_precision=0
vfs.read_max=128
kern.sync_on_panic=1
net.inet.tcp.hostcache.expire=1200
net.inet.tcp.keepinit=5000
net.inet.tcp.ecn.enable=1 # explicit congestion notification (ecn) warning: some ISP routers abuse ECN (default 0)
net.inet.tcp.mssdflt=1460
net.inet.tcp.cc.abe=1
net.inet.tcp.minmss=536
net.inet.ip.maxfragpackets=1024
net.inet.ip.maxfragsperpacket=16
net.inet.tcp.abc_l_var=44
net.inet.tcp.initcwnd_segments=44
net.inet.tcp.delacktime=20
net.inet.tcp.rfc6675_pipe=1
vm.swap_idle_enabled=1
# tune for postgres
vfs.zfs.metaslab.lba_weighting_enabled=0
kern.ipc.shmall=2097152
kern.ipc.shmmax=17179877376
# nfsv4
vfs.nfs.enable_uidtostring=1
vfs.nfsd.enable_stringtouid=1
vfs.nfsd.issue_delegations=1
vfs.nfsd.enable_locallocks=1
#vfs.nfsd.async=1
vfs.nfs.nfs_directio_enable=1
# dump cores
#kern.sugid_coredump=1
dev.ixl.0.iflib.rx_budget=65535
dev.ixl.1.iflib.rx_budget=65535
net.inet.tcp.functions_default=rack
net.inet.tcp.syncookies=0
Thank you!
Last edited: