FreeBSD 8.1 ZFS too slow

I install FreeBSD 8.1 in my box which main used as a web server, and the file system is zfs.
My server has 12 disks, 8G memory used zfs like this.
First vdev 2 disks as mirror.
Second vdev 5 disks as raidz. (now the website in this vdev)
Third vdev 5 disks as raidz.

When the server started, the speed was very fast but when it run one or two days, it becomes slow.
Though gstat, the disk IO is very busy!
Is there any other config about zfs?

uname -a output
Code:
FreeBSD xxx 8.1-RELEASE FreeBSD 8.1-RELEASE #0: Mon Dec 20 20:50:20 CST 2010     root@xxx:/usr/obj/usr/src/sys/xxxCORE  amd64

Code:
VS001# vmstat 3
 procs      memory      page                    disks     faults         cpu
 r b w     avm    fre   flt  re  pi  po    fr  sr mf0 mf1   in   sy   cs us sy id
20 0 0   6649M   537M  5117   0   1   0 15894   0   0   0 1468 10667 10731 14  6 80
 1 0 0   6651M   503M 21235   4   8   2 35916   0  20   9 3434 25852 17194 49 12 40
 1 0 0   6649M   548M  2454   0   0   0 23378   0  65  49 1987 12911 13029 15  8 77
12 0 0   6717M   494M 12075   0   0   0 24995   0  68  50 2103 23922 12222 27 12 61
10 0 0   6630M   539M  7714  10  20   0 24126   0  13  14 2044 15665 12895 22  8 70
 1 0 0   6616M   572M  8806   2   4   0 26826   0  46  55 2700 26212 15909 35 12 52
28 0 0   6612M   561M  5977   3   7   0 16128   0   9  18 1784 15241 11645 22  8 71
 0 0 0   6588M   590M  6643   3   6   0 19419   0  63  65 2055 11867 14618 19  6 75
 0 0 0   6697M   516M  9518   2   4   0 15064   0  29  35 2220 13942 16277 23  8 69
38 0 0   6635M   545M  6181   0   0   0 17509   0  35  31 2399 16277 15336 25  7 68
Code:
VS001# iostat -xn 12 3
                        extended device statistics  
device     r/s   w/s    kr/s    kw/s wait svc_t  %b  
mfid0      3.3  19.9   165.9   494.9    0   2.0   1 
mfid1      3.3  20.0   167.0   494.9    0   2.0   1 
mfid2    134.9  44.7  4268.3   225.0    3  10.5  74 
mfid3    128.4  44.7  4242.4   225.0    8  11.0  75 
mfid4    128.2  44.6  4335.7   224.9    4  13.0  76 
mfid5    134.3  44.7  4341.6   225.0    8  10.7  74 
mfid6    130.1  44.7  4279.8   225.0    2  10.8  73 
mfid7      0.1   9.3     4.6   807.2    0   3.7   1 
mfid8      0.1   9.3     4.8   807.2    0   3.0   1 
mfid9      0.1   9.3     4.7   807.2    0   3.6   1 
mfid10     0.1   9.2     4.7   807.2    0   8.2   1 
mfid11     0.1   9.2     4.7   807.2    0   5.8   1

gstat output
Code:
dT: 1.004s  w: 1.000s
 L(q)  ops/s    r/s   kBps   ms/r    w/s   kBps   ms/w   %busy Name
    0     28     28   2380    2.7      0      0    0.0    7.4| mfid0
    0      0      0      0    0.0      0      0    0.0    0.0| mfid0p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid0p2
    0     28     28   2380    2.7      0      0    0.0    7.5| mfid0p3
    0     24     24   1162    1.7      0      0    0.0    2.8| mfid1
    8    252    252   5704   17.6      0      0    0.0   99.4| mfid2
    8    236    236   5394   34.0      0      0    0.0   99.9| mfid3
    4    244    244   5393   17.9      0      0    0.0   99.6| mfid4
    8    252    252   5876   16.3      0      0    0.0   98.7| mfid5
    4    249    249   6017   23.0      0      0    0.0  100.0| mfid6
    0      0      0      0    0.0      0      0    0.0    0.0| mfid7
    0      0      0      0    0.0      0      0    0.0    0.0| mfid8
    0      0      0      0    0.0      0      0    0.0    0.0| mfid9
    0      0      0      0    0.0      0      0    0.0    0.0| mfid10
    0      0      0      0    0.0      0      0    0.0    0.0| mfid11
    0      0      0      0    0.0      0      0    0.0    0.0| gptid/df7e8b26-0c6e-11e0-82ab-842b2b53bb80
    0     28     28   2380    2.7      0      0    0.0    7.5| gpt/disk0
    0      0      0      0    0.0      0      0    0.0    0.0| mfid1p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid1p2
    0     24     24   1162    1.7      0      0    0.0    2.8| mfid1p3
    8    252    252   5704   17.7      0      0    0.0   99.4| mfid2p1
    8    236    236   5394   34.1      0      0    0.0   99.9| mfid3p1
    4    244    244   5393   18.0      0      0    0.0   99.6| mfid4p1
    8    252    252   5876   16.4      0      0    0.0   98.7| mfid5p1
    4    249    249   6017   23.1      0      0    0.0  100.0| mfid6p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid7p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid8p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid9p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid10p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid11p1
    0      0      0      0    0.0      0      0    0.0    0.0| gptid/36dc456a-0c6f-11e0-82ab-842b2b53bb80
    0      0      0      0    0.0      0      0    0.0    0.0| mirror/swap
    0     24     24   1162    1.7      0      0    0.0    2.9| gpt/disk1
    8    252    252   5704   17.7      0      0    0.0   99.4| gpt/disk2
    8    236    236   5394   34.1      0      0    0.0   99.9| gpt/disk3
    4    244    244   5393   18.0      0      0    0.0   99.6| gpt/disk4
    8    252    252   5876   16.4      0      0    0.0   98.7| gpt/disk5
    4    249    249   6017   23.1      0      0    0.0  100.0| gpt/disk6
    0      0      0      0    0.0      0      0    0.0    0.0| gpt/disk7
    0      0      0      0    0.0      0      0    0.0    0.0| gpt/disk8
    0      0      0      0    0.0      0      0    0.0    0.0| gpt/disk9
    0      0      0      0    0.0      0      0    0.0    0.0| gpt/disk10
    0      0      0      0    0.0      0      0    0.0    0.0| gpt/disk11
#more /boot/loader.conf
Code:
zfs_load="YES"
vfs.root.mountfrom="zfs:zroot"
geom_mirror_load="YES"
vm.kmem_size="2048M"
vm.kmem_size_max="3072M"
vfs.zfs.arc_min="1024M"
vfs.zfs.arc_max="1536M"
#vfs.zfs.vdev.cache.size="5M"
vfs.zfs.vdev.min_pending="4"
vfs.zfs.vdev.max_pending="8"
vfs.zfs.prefetch_disable="1"
vfs.zfs.txg.timeout="5"
vfs.zfs.txg.synctime="1"
vfs.zfs.txg.write_limit_override="524288000"
kern.maxfiles="65536"
kern.maxfilesperproc="65536"
kern config about zfs

Code:
vfs.zfs.l2c_only_size: 0
vfs.zfs.mfu_ghost_data_lsize: 22435840
vfs.zfs.mfu_ghost_metadata_lsize: 516310016
vfs.zfs.mfu_ghost_size: 538745856
vfs.zfs.mfu_data_lsize: 2424320
vfs.zfs.mfu_metadata_lsize: 1797120
vfs.zfs.mfu_size: 221218304
vfs.zfs.mru_ghost_data_lsize: 89661952
vfs.zfs.mru_ghost_metadata_lsize: 528548864
vfs.zfs.mru_ghost_size: 618210816
vfs.zfs.mru_data_lsize: 524288
vfs.zfs.mru_metadata_lsize: 16384
vfs.zfs.mru_size: 507841536
vfs.zfs.anon_data_lsize: 0
vfs.zfs.anon_metadata_lsize: 0
vfs.zfs.anon_size: 5612032
vfs.zfs.l2arc_norw: 1
vfs.zfs.l2arc_feed_again: 1
vfs.zfs.l2arc_noprefetch: 0
vfs.zfs.l2arc_feed_min_ms: 200
vfs.zfs.l2arc_feed_secs: 1
vfs.zfs.l2arc_headroom: 2
vfs.zfs.l2arc_write_boost: 8388608
vfs.zfs.l2arc_write_max: 8388608
vfs.zfs.arc_meta_limit: 402653184
vfs.zfs.arc_meta_used: 1543003976
vfs.zfs.mdcomp_disable: 0
vfs.zfs.arc_min: 1073741824
vfs.zfs.arc_max: 1610612736
vfs.zfs.zfetch.array_rd_sz: 1048576
vfs.zfs.zfetch.block_cap: 256
vfs.zfs.zfetch.min_sec_reap: 2
vfs.zfs.zfetch.max_streams: 8
vfs.zfs.prefetch_disable: 1
vfs.zfs.check_hostid: 1
vfs.zfs.recover: 0
vfs.zfs.txg.write_limit_override: 524288000
vfs.zfs.txg.synctime: 1
vfs.zfs.txg.timeout: 5
vfs.zfs.scrub_limit: 10
vfs.zfs.vdev.cache.bshift: 16
vfs.zfs.vdev.cache.size: 10485760
vfs.zfs.vdev.cache.max: 16384
vfs.zfs.vdev.aggregation_limit: 131072
vfs.zfs.vdev.ramp_rate: 2
vfs.zfs.vdev.time_shift: 6
vfs.zfs.vdev.min_pending: 4
vfs.zfs.vdev.max_pending: 8
vfs.zfs.cache_flush_disable: 0
vfs.zfs.zil_disable: 0
vfs.zfs.zio.use_uma: 0
vfs.zfs.version.zpl: 3
vfs.zfs.version.vdev_boot: 1
vfs.zfs.version.spa: 14
vfs.zfs.version.dmu_backup_stream: 1
vfs.zfs.version.dmu_backup_header: 2
vfs.zfs.version.acl: 1
vfs.zfs.debug: 0
vfs.zfs.super_owner: 0
 
Code:
VS001# zpool iostat -v
                  capacity     operations    bandwidth
pool            used  avail   read  write   read  write
-------------  -----  -----  -----  -----  -----  -----
backup          660G  3.89T      0      0      4     52
  raidz1        660G  3.89T      0      0      4     52
    gpt/disk7      -      -      0      0    100     15
    gpt/disk8      -      -      0      0     92     15
    gpt/disk9      -      -      0      0     87     15
    gpt/disk10      -      -      0      0     92     15
    gpt/disk11      -      -      0      0     96     16
-------------  -----  -----  -----  -----  -----  -----
wwwroot         911G  3.64T     60    196   740K  1010K
  raidz1        911G  3.64T     60    196   740K  1010K
    gpt/disk2      -      -     40     56  1.51M   260K
    gpt/disk3      -      -     40     56  1.50M   260K
    gpt/disk4      -      -     40     56  1.49M   260K
    gpt/disk5      -      -     40     57  1.50M   260K
    gpt/disk6      -      -     40     57  1.49M   260K
-------------  -----  -----  -----  -----  -----  -----
zroot          28.9G   891G     26    115   578K  1.37M
  mirror       28.9G   891G     26    115   578K  1.37M
    gpt/disk0      -      -      3     32   294K  1.37M
    gpt/disk1      -      -      3     32   296K  1.37M
-------------  -----  -----  -----  -----  -----  -----

I read this thread .
http://forums.freebsd.org/showthread.php?p=63019

and have a try, decrease the kern.maxvnodes, now the value is 100000
 
Hi,

did you check if your disks are of the advanced format 4k block size? If yes that could explain low performance in general, but wouldn't explain change of performance over time.

Otherwise it seems just that the disks in your RAID vdev are 100 busy doing about 250 read IOs per second. This would simply imply the read work load of your server/application is too much for a 5 disk RAID vdev. Options would include using multiple mirrored vdevs to run in stripe mirror, or more RAID vdevs to achieve stripping over the RAIDs, or using more system memory for the ARC or adding an L2ARC.
The issue would seem to be of hardware performance rather than something to do with OS or the file system. ZFS isn't going to do extra read IOs for fun, its doing them because something is issuing reads requests to those files/data.

thanks Andy.

PS you can make the gstat output a bit clearer by adding a filter such as:
Code:
gstat -f mfid.p1
 
gpart show <disk>

Are your partitions properly aligned? Proper alignment usually means 2048 sectors offset, assuming 512-byte per sector as all current 4K sector HDDs are emulating.
 
AndyUKG said:
Hi,

did you check if your disks are of the advanced format 4k block size? If yes that could explain low performance in general, but wouldn't explain change of performance over time.

Otherwise it seems just that the disks in your RAID vdev are 100 busy doing about 250 read IOs per second. This would simply imply the read work load of your server/application is too much for a 5 disk RAID vdev. Options would include using multiple mirrored vdevs to run in stripe mirror, or more RAID vdevs to achieve stripping over the RAIDs, or using more system memory for the ARC or adding an L2ARC.
The issue would seem to be of hardware performance rather than something to do with OS or the file system. ZFS isn't going to do extra read IOs for fun, its doing them because something is issuing reads requests to those files/data.

thanks Andy.

PS you can make the gstat output a bit clearer by adding a filter such as:
Code:
gstat -f mfid.p1

Code:
VS001# gstat -f mfid.p1

dT: 1.002s  w: 1.000s  filter: mfid.p1
 L(q)  ops/s    r/s   kBps   ms/r    w/s   kBps   ms/w   %busy Name
    0      0      0      0    0.0      0      0    0.0    0.0| mfid0p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid1p1
    0     63     63   2946   26.7      0      0    0.0   90.6| mfid2p1
    0     62     62   2824   22.8      0      0    0.0   78.0| mfid3p1
    0     64     64   2949   22.3      0      0    0.0   84.9| mfid4p1
    0     66     66   3012   19.4      0      0    0.0   77.4| mfid5p1
    2     68     68   3266   16.3      0      0    0.0   68.9| mfid6p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid7p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid8p1
    0      0      0      0    0.0      0      0    0.0    0.0| mfid9p1
And how to check if a disk is an advanced format 4k block size?
 
sub_mesa said:
gpart show <disk>

Are your partitions properly aligned? Proper alignment usually means 2048 sectors offset, assuming 512-byte per sector as all current 4K sector HDDs are emulating.


Code:
VS001# gpart show mfid0
=>        34  1952448445  mfid0  GPT  (931G)
          34         128      1  freebsd-boot  (64K)
         162     8388608      2  freebsd-swap  (4.0G)
     8388770  1944059709      3  freebsd-zfs  (927G)

VS001# gpart show mfid1
=>        34  1952448445  mfid1  GPT  (931G)
          34         128      1  freebsd-boot  (64K)
         162     8388608      2  freebsd-swap  (4.0G)
     8388770  1944059709      3  freebsd-zfs  (927G)

VS001# gpart show mfid2
=>        34  1952448445  mfid2  GPT  (931G)
          34  1952448445      1  freebsd-zfs  (931G)
... and so on . To mfid11

I doubt the problem because of the memory, the long the server run, the less free memory left, and even use swap.
 
jackie said:
I doubt the problem becouse of the memory ,the long the server run ,the less free memory left ,and even use swap .

Your logic for doubting this problem is related to memory issues is less than convincing. In fact, what you have described are fairly typical symptoms when memory runs low and the system starts paging to swap, perhaps due to a memory leak somewhere. I suggest tracking down whatever is consuming so much memory and fixing that.
 
jackie said:
And how to check if a disk is an advanced format 4k block size?

Check the disk manufacturers specifications for the disk model you have (ie download the spec sheet from their web site). If you don't have the model number you should be able to find it with "camcontrol identify", tho I'm not 100% if this will work on mfid devices...


thanks Andy.
 
I agree with the opinions of the other guys.

If the most of the 911GB on wwwroot are userdata (millions of tiny files) that is randomly accessed via web- and mysqlserver (maybe mail too), then your max arcsize is to small. You can download arc_summary and use it to figure out how the zfs caches perform with your settings.

Another point is the zfs setup. Why you dont use the disks from wwwroot and backup in one pool? One pool with 2x raidz1 with 5 disk each, for example. I mean, if you need safe backups, you have to save the data on another server/medium anyway. And if the backups not that necessary, there is no need for a extra pool.

Be aware of just adding a SSD as read cache, because it forces zfs to consume more RAM.

---

OT: If you using PHP check the settings for error logging in the php.ini . Maybe error logging is activated and written to the pool, so some dirty user scripts can spam the logs with errors & warnings.
 
One pool with 10 disks, does it have a good performance? The backup pool main used for store files from another server.
 
jackie said:
One pool with 10 disks, does it have a good performance? The backup pool main used for store files from another server.
One pool with 2 vdevs of 5 disks will perform much better, then 1 vdev of 10 disks.
Also when using too many disks for a vdev, resilver times will be very long!
 
A zfs pool with multiple mirrors will offer the best random read performance, followed by raidz2 (raid6) and raidz1 (raid5), which is the slowest.

I tested it for myself some months ago on FreeBSD 8.0 AMD64. Disk were always in one big pool.


Code:
Q6600 2,4GHz, 4core, 8GB RAM, 8x500GB SATA 7200rpm an 3ware 9550 112MB Cache (+ 128GB GB SSD an onboard ahci sata used as ZFS cache dev)
												
8x 2GB	iozone -R -l 8 -u 8 -r 128k -s 2048m -+u -F /vpool/0 /vpool/1 /vpool/2 /vpool/3 /vpool/4 /vpool/5 /vpool/6 /vpool/7
8x 16GB	iozone -R -l 8 -u 8 -r 128k -s 16384m -+u -F /vpool/0 /vpool/1 /vpool/2 /vpool/3 /vpool/4 /vpool/5 /vpool/6 /vpool/7

Raid Type + tsize  record size	Initial write	Rewrite	Read	Re-read	Reverse Read	Stride read 	Random read	Mixed workload	Random 
write	Pwrite	Pread
ZFS 4xmirror 16GB	4k	144555	104575	382731	462835	486994	27597	2304	1883	1596	150238	441895
ZFS 4xmirror 16GB	8k	150118	101692	411235	458278	479743	28707	4639	3738	3160	157564	432214
ZFS 4xmirror 16GB	16k	152849	105622	367779	455611	477287	15183	9079	7374	6227	152715	444353
ZFS 4xmirror 16GB	32k	155844	102256	435850	458116	475463	25335	18318	14680	12214	149054	417539
ZFS 4xmirror 16GB	64k	172385	117065	428523	456680	475559	112921	34850	29272	24939	176967	459244
ZFS 4xmirror 16GB	128k	185493	182575	457494	452382	478371	237971	67530	185980	177541	183541	467138
ZFS 4xmirror 128GB	128k	181605	180101	465140	465687	461793	210729	58641	173489	170506	178522	466522
ZFS 4xmirror 128GB+SSD	128k	179417	173220	443387	427688	429643	194406	56594	172343	173297	177863	451006

ZFS 2x raidz1 16GB	4k	201372	129066	400362	441077	411856	27416	1688	1406	1278	201227	431656
ZFS 2x raidz1 16GB	8k	197808	124648	410564	445269	453686	27733	3367	2823	2547	213189	440282
ZFS 2x raidz1 16GB	16k	204520	126539	396743	451942	451812	8592	6690	5560	5061	216175	432636
ZFS 2x raidz1 16GB	32k	203774	115964	405759	455969	468883	16520	11748	10344	9641	199992	413443
ZFS 2x raidz1 16GB	64k	229274	136662	357300	416861	419838	54953	25397	22539	19677	224096	446347
ZFS 2x raidz1 16GB	128k	251605	241508	455531	453434	457922	119443	48417	242068	243598	256369	442286
ZFS 2x raidz1 128GB	128k	246989	241543	459242	458483	455429	110994	38390	186402	239073	245531	460195
ZFS 2x raidz1+SSD 128GB	128k	244157	234328	425171	388694	385990	95838	48306	178327	238108	245559	427545

ZFS 2x raidz2 16GB	4k	138122	89330	349742	383184	386809	22712	2060	1629	1377	144496	368246
ZFS 2x raidz2 16GB	8k	138626	84824	334442	368534	369128	19834	3611	2990	2720	141550	347584
ZFS 2x raidz2 16GB	16k	149676	86681	364003	381677	379787	7797	8203	6392	5498	143029	327476
ZFS 2x raidz2 16GB	32k	149548	85900	360459	385511	387083	19177	16455	12984	10782	147979	375047
ZFS 2x raidz2 16GB	64k	167727	100181	340070	381868	371783	71109	31066	26135	21971	168722	387090
ZFS 2x raidz2 16GB	128k	184320	175441	377009	379471	387589	147872	60618	166081	163162	176361	372442
ZFS 2x raidz2 128GB	128k	175074	170698	375560	375379	376987	115377	45108	132307	154887	172543	381577
ZFS 2x raidz2+SSD 128GB	128k	175509	168228	354856	350179	341216	93915	47693	161779	159817	173295	373874


Command line used: iozone -R -l 1 -u 1 -r 128k -s 16384m -+u -F /vpool/0
						 						
ZFS 1x SSD	128k	161471	158005	244004	239522	240699	132534	117591	117154	151737	107356	240417
UFS 1x SSD	128k	170970	167753	233725	235838	212217	140302	136591	122581	4748	94023	236074

bildschirmfoto20110119u.png
 
Back
Top