Something hangs with my FreeBSD box

Hi there, I've got a big problem. First of all what we have:
  1. 10.0-RELEASE FreeBSD AMD 64
  2. system running on XEN HVM ( xen-detect: Running in HVM context on Xen v3.4. , 4 cores, 4GB RAM )
  3. on base system running 2 jails on aliases for lo0 - lo1:
    - one with nginx-1.4.7_1,2 plus php53-5.3.28_1 with php-fpm
    - second with mysql55-server-5.5.37
  4. from the external IP, connections are forwarded to jails by pf
  5. running web application is used by 30 persons at one time
Every day around 11 PM web requests hang (browser freezes - I have to kill the browser) and only a reboot of the whole box solves my problem.

What I do:
  • When I try to restart jails it helps for a short time
  • MySQL processlist shows nothing
  • system logs shows nothing that could be suspicious
  • system load is small: cores up to 1%, memory around 2 GB free
  • gstat shows load up to 5%
So I try to tune up all daemons (nginx, mysql, php-fpm) - no effect.

Configuration main:

Code:
# XEN HVM kernel 

cpu HAMMER
ident XENHVM

makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols
makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support

options SCHED_ULE # ULE scheduler
options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
#options INET6 # IPv6 communications protocols
options TCP_OFFLOAD # TCP offload
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
options SOFTUPDATES # Enable FFS soft updates support
options UFS_ACL # Support for access control lists
options UFS_DIRHASH # Improve performance on big directories
options UFS_GJOURNAL # Enable gjournal-based UFS journaling
options QUOTA # Enable disk quotas for UFS
options MD_ROOT # MD is a potential root device
options PROCFS # Process filesystem (requires PSEUDOFS)
options PSEUDOFS # Pseudo-filesystem framework
options GEOM_PART_GPT # GUID Partition Tables.
options GEOM_RAID # Soft RAID functionality.
options GEOM_LABEL # Provides labelization
options COMPAT_FREEBSD32 # Compatible with i386 binaries
options COMPAT_FREEBSD4 # Compatible with FreeBSD4
options COMPAT_FREEBSD5 # Compatible with FreeBSD5
options COMPAT_FREEBSD6 # Compatible with FreeBSD6
options COMPAT_FREEBSD7 # Compatible with FreeBSD7
options SCSI_DELAY=500 # Delay (in ms) before probing SCSI
options KTRACE # ktrace(1) support
options STACK # stack(9) support
options SYSVSHM # SYSV-style shared memory
options SYSVMSG # SYSV-style message queues
options SYSVSEM # SYSV-style semaphores
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed.
options KBD_INSTALL_CDEV # install a CDEV entry in /dev
options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4)
options AUDIT # Security event auditing
options CAPABILITY_MODE # Capsicum capability mode
options CAPABILITIES # Capsicum capabilities
options PROCDESC # Support for process descriptors
options MAC # TrustedBSD MAC Framework
options KDTRACE_FRAME # Ensure frames are compiled in
options KDTRACE_HOOKS # Kernel DTrace hooks
options DDB_CTF # Kernel ELF linker loads CTF data
options INCLUDE_CONFIG_FILE # Include this file in kernel

# Debugging support. Always need this:
options KDB # Enable kernel debugger support.
options KDB_TRACE # Print a stack trace for a panic.

# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel

# CPU frequency control
device cpufreq

# Bus support.
device acpi
device pci

# ATA controllers
device ahci # AHCI-compatible SATA controllers
device ata # Legacy ATA/SATA controllers
options ATA_STATIC_ID # Static device numbering

# ATA/SCSI peripherals
device          scbus           # SCSI bus (required for ATA/SCSI)
device          da              # Direct Access (disks)
device          pass            # Passthrough device (direct ATA/SCSI access)
device          ses             # Enclosure Services (SES and SAF-TE)

# atkbdc0 controls both the keyboard and the PS/2 mouse
device atkbdc # AT keyboard controller
device atkbd # AT keyboard
#device psm # PS/2 mouse

device kbdmux # keyboard multiplexer

# syscons
device agp
device vga
device sc
device splash
options VESA
options SC_PIXEL_MODE

# PCI Ethernet NICs that use the common MII bus controller code.
device miibus # MII bus support

# Pseudo devices.
device loop # Network loopback
device random # Entropy device
device padlock_rng # VIA Padlock RNG
device rdrand_rng # Intel Bull Mountain RNG
device ether # Ethernet support
device vlan # 802.1Q VLAN support
device md # Memory "disks"
device firmware # firmware assist module

# The `bpf' device enables the Berkeley Packet Filter.
device bpf # Berkeley packet filter

# MMC/SD
device mmc # MMC/SD bus
device mmcsd # MMC/SD memory card
device sdhci # Generic PCI SD Host Controller

# VirtIO support + Xen HVM Guest Optimizations
device virtio # Generic VirtIO bus (required)
device virtio_pci # VirtIO PCI device
device vtnet # VirtIO Ethernet device
device virtio_blk # VirtIO Block device
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
device hyperv # HyperV drivers 
options XENHVM # Xen HVM kernel infrastructure
device xenpci # Xen HVM Hypervisor services driver
device vmx # VMware VMXNET3 Ethernet

#addons
options PANIC_REBOOT_WAIT_TIME=5
options NULLFS

device pf
device pflog
device pfsync
options ACCEPT_FILTER_DATA
options ACCEPT_FILTER_DNS
options ACCEPT_FILTER_HTTP

pf.conf
Code:
ext_if = "xn0"
int_if = "lo1"

#scrub in on $ext_if all random-id
scrub in all

nat-anchor "ftp-proxy/*"
nat pass on $ext_if from 10.10.10.0/24 to any -> { $ext_if }

rdr-anchor "ftp-proxy/*"
#rdr pass on $int_if proto tcp from any to any port ftp -> 127.0.0.1 port 8021
rdr pass on $ext_if proto tcp from any to { $ext_if } port 80 -> 10.10.10.10 port 80
rdr pass on $ext_if proto tcp from any to { $ext_if } port 3306 -> 10.10.10.20 
rdr pass on $ext_if proto tcp from any to { $ext_if } port 3308 -> 10.10.10.10 port 3306
rdr pass on $ext_if proto tcp from any to { $ext_if } port 3307 -> 10.10.10.30 port 3306
#rdr pass on $ext_if proto tcp from any to { $ext_if } port { 25 587 110 } -> 10.10.10.30 
#rdr pass on $ext_if proto tcp from any to { $ext_if } port { 21 20 } -> 10.10.10.40 

set skip on { lo0 lo1 }
#antispoof for $ext_if inet

anchor "ftp-proxy/*"
pass in all
pass out keep state

sysctl.conf
Code:
security.bsd.see_other_uids=0
net.inet.tcp.blackhole=2
net.inet.udp.blackhole=1
net.inet.sctp.blackhole=0

net.inet.ip.portrange.first=1024

security.jail.sysvipc_allowed=1
security.jail.allow_raw_sockets=1

kern.maxfiles=4096
kern.maxfilesperproc=4096
kern.ipc.somaxconn=4096

vfs.read_max: 64

net.inet.ip.portrange.randomized=0

jail.conf
Code:
exec.start = "/bin/sh /etc/rc";
exec.stop = "/bin/sh /etc/rc.shutdown";
allow.noset_hostname;
www {
        jid = 1;
        path = "/v/jls/www";
        mount.devfs;
        devfs_ruleset = 4;
        host.hostname = "www.lan";
        ip4.addr = 10.10.10.10;
        interface = lo1;
        allow.raw_sockets;
        allow.sysvipc;
        mount.fstab = "/etc/fstab.www";
}

db2 {
        jid = 3;
        path=/v/jls/db2;
        mount.devfs;
        devfs_ruleset = 4;
        host.hostname=db2.lan;
        ip4.addr=10.10.10.30;
        interface=lo1;
        allow.raw_sockets;
        allow.sysvipc;
}

Configuration db2:

/usr/local/etc/my.cnf
Code:
[client]
port=3306
default-character-set=latin2
socket=/tmp/mysql.sock

[mysqld]
port=3306
socket=/tmp/mysql.sock
key_buffer_size=200M
max_allowed_packet=16M
#federated
event_scheduler = 'ON'
character-set-server=latin2
collation-server=latin2_general_ci
init-connect='set names latin2'
innodb_file_per_table
max_connections = 256

slow_query_log = 1
long_query_time = 2
#log_queries_not_using_indexes = 1
slow_query_log_file = /var/db/mysql/slowquery.log
log_slow_queries = /var/db/mysql/slowquery.log
#general_log = 1
general_log_file = /var/db/mysql/general.log

[mysql]
default-character-set=latin2

[mysqlshow]
default-character-set=latin2

[mysqlimport]
default-character-set=latin2

[mysqlcheck]
default-character-set=latin2

[mysql_upgrade]
default-character-set=latin2

Configuration www:

php-fpm
Code:
[global]
pid = run/php-fpm.pid
error_log = /var/log/php-fpm.log
syslog.facility = daemon
syslog.ident = php-fpm
emergency_restart_threshold = 10
emergency_restart_interval = 1m
process_control_timeout = 10s
 
rlimit_files = 2048
 
events.mechanism = kqueue
[www]
user = www
group = www
listen = 10.10.10.10:9000
listen.backlog = -1
listen.owner = www
listen.group = www
listen.mode = 0666
 
pm = static
pm.max_children = 32
pm.min_spare_servers = 20
pm.max_spare_servers = 50
pm.start_servers = 2
request_terminate_timeout = 120s

nginx
Code:
worker_processes  4;
worker_rlimit_nofile    100000;
error_log /var/log/nginx.error.log error;

events {
        worker_connections  4096;
        multi_accept on;
}

http {
        log_format main    '$remote_addr - $remote_user [$time_local] $status '
                                                                                        '"$request" $body_bytes_sent "$http_referer" '
                                                                                        '"$http_user_agent" "http_x_forwarded_for" ';
        #access_log      /var/log/nginx.acces.log main;
        access_log off;

        sendfile on;
        tcp_nopush on;
        tcp_nodelay on;

        keepalive_timeout 128;
        client_body_timeout 10;
        client_header_timeout 10;
        send_timeout 10;

        keepalive_requests 200;
        reset_timedout_connection on;
        types_hash_max_size 2048;
        server_tokens off;

        open_file_cache          max=5000  inactive=20s;
        open_file_cache_valid    30s;
        open_file_cache_min_uses 2;
        open_file_cache_errors   on;

        gzip on;
        gzip_min_length 10240;
        gzip_disable "msie6";
        gzip_comp_level 3;
        gzip_types text/css application/x-javascript text/xml application/xml application/xml+rss text/javascript application/javascript text/x-js;
        gzip_buffers 16 8k;

        include                 mime.types;
        default_type            application/octet-stream;
        include                 (web server definition here);
}

netstat -m
Code:
68/1267/1335 mbufs in use (current/cache/total)
2/504/506/254336 mbuf clusters in use (current/cache/total/max)
2/504 mbuf+clusters out of packet secondary zone in use (current/cache)
64/45/109/127168 4k (page size) jumbo clusters in use (current/cache/total/max)
0/0/0/37679 9k jumbo clusters in use (current/cache/total/max)
0/0/0/21194 16k jumbo clusters in use (current/cache/total/max)
277K/1504K/1781K bytes allocated to network (current/cache/total)
66/491/6 requests for mbufs denied (mbufs/clusters/mbuf+clusters)
0/0/0 requests for mbufs delayed (mbufs/clusters/mbuf+clusters)
0/0/0 requests for jumbo clusters delayed (4k/9k/16k)
20/0/0 requests for jumbo clusters denied (4k/9k/16k)
0 requests for sfbufs denied
0 requests for sfbufs delayed
107 requests for I/O initiated by sendfile

netstat -ss
Code:
tcp:
        151577 packets sent
                88066 data packets (137448300 bytes)
                130 data packets (196299 bytes) retransmitted
                3 data packets unnecessarily retransmitted
                43563 ack-only packets (3967 delayed)
                81 window update packets
                19737 control packets
        172155 packets received
                114485 acks (for 137451220 bytes)
                11444 duplicate acks
                96194 packets (42667141 bytes) received in-sequence
                1425 completely duplicate packets (5357 bytes)
                396 out-of-order packets (120603 bytes)
                2277 window update packets
                70 packets received after close
        5261 connection requests
        9713 connection accepts
        3 ignored RSTs in the windows
        14971 connections established (including accepts)
        14887 connections closed (including 9 drops)
                7115 connections updated cached RTT on close
                7115 connections updated cached RTT variance on close
                172 connections updated cached ssthresh on close
        99281 segments updated rtt (of 93436 attempts)
        56 retransmit timeouts
        19673 correct ACK header predictions
        26236 correct data packet header predictions
        9716 syncache entries added
                9 retransmitted
                1 dupsyn
                9713 completed
                3 stale
        9716 cookies sent
        9 hostcache entries added
        18 SACK recovery episodes
        50 segment rexmits in SACK recovery episodes
        73000 byte rexmits in SACK recovery episodes
        242 SACK options (SACK blocks) received
udp:
        18196 datagrams received
        4 dropped due to no socket
        1021 broadcast/multicast datagrams undelivered
        17171 delivered
        17174 datagrams output
sctp:
        Packet drop statistics:
        Timeouts:
ip:
        191160 total packets received
        190351 packets for this host
        803 packets not forwardable
        168836 packets sent from this host
icmp:
        ICMP address mask responses are disabled
igmp:
pfsync:
            1 clear all request sent
            22130 state inserts sent
            16959 compressed state updates sent
            21809 compressed state deletes sent
            6438 end of frame marks sent
arp:
        2 ARP requests sent
        125 ARP replies sent
        61354 ARP requests received
        278 ARP replies received
        61632 ARP packets received

What can I do? What else can I check to find what is blocking my box? The shell works fine without any problems.

Kind regards
Mark
 
Well, the first thing that comes to mind is a cron(8) job, maybe even one of the periodic(8) daily jobs since it happens at the same time every day. Have you checked into those sources as a potential cause?
 
trh411 said:
Well, the first thing that comes to mind is a cron(8) job, maybe even one of the periodic(8) daily jobs since it happens at the same time every day. Have you checked into those sources as a potential cause?

For main and jails:
Code:
cat /etc/periodic.conf 
daily_status_security_neggrpperm_enable="NO"

Code:
cat /etc/crontab 
# /etc/crontab - root's crontab for FreeBSD
#
# $FreeBSD: release/10.0.0/etc/crontab 194170 2009-06-14 06:37:19Z brian $
#
SHELL=/bin/sh
PATH=/etc:/bin:/sbin:/usr/bin:/usr/sbin
#
#minute hour    mday    month   wday    who     command
#
*/5     *       *       *       *       root    /usr/libexec/atrun
#
# Save some entropy so that /dev/random can re-seed on boot.
*/11    *       *       *       *       operator /usr/libexec/save-entropy
#
# Rotate log files every hour, if necessary.
0       *       *       *       *       root    newsyslog
#
# Perform daily/weekly/monthly maintenance.
#1      3       *       *       *       root    periodic daily
#15     4       *       *       6       root    periodic weekly
#30     5       1       *       *       root    periodic monthly
#
# Adjust the time zone if the CMOS clock keeps local time, as opposed to
# UTC time.  See adjkerntz(8) for details.
1,31    0-5     *       *       *       root    adjkerntz -a
 
What kind of tools can I use to check what's wrong, step by step? Do you have a suggestion about something that I miss?
 
Back
Top