bhyve Windows Server slow IO

I have Windows Server 2016 installed in a bhyve VM on FreeBSD 12.0 and overall it runs well, except I/O operations are slow. For example, simply extracting a 12KB zip archive can take close to 10 seconds. I did an iostat on its device (/dev/vmm/winserver2016) and here's the output I got when doing the extract:

Code:
       tty            cpu
 tin  tout us ni sy in id
   0    27  0  0 31  0 69
   0    27  0  0 24  0 76
   0    27  0  0 33  0 67
   0    27  0  0 33  0 67
   0    27  0  0 44  0 56
   0    27  0  0 51  0 49
   0    27  0  0 50  0 50

it has 2 cores and 3GB on an i5 4570 (host has 8GB totalmem) and all running on an SSD and the vm was installed in a zfs dataset with vm-bhyve. Any insights would be great. Not used for production at least
 
For me, using virtio-blk instead of ahci-hd for the virtual disk made a huge difference. But there's a gotcha: If you don't run -CURRENT, you have to apply a patch, otherwise bhyve crashes quickly when a windows guest uses a virtio-blk disk with redhat virtio windows drivers:

Code:
--- head/usr.sbin/bhyve/virtio.c    2019/05/18 17:30:03    347959
+++ head/usr.sbin/bhyve/virtio.c    2019/05/18 19:32:38    347960
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2013  Chris Torek <torek @ torek net>
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,6 +33,8 @@
 #include <sys/param.h>
 #include <sys/uio.h>
 
+#include <machine/atomic.h>
+
 #include <stdio.h>
 #include <stdint.h>
 #include <pthread.h>
@@ -422,6 +425,12 @@
     vue = &vuh->vu_ring[uidx++ & mask];
     vue->vu_idx = idx;
     vue->vu_tlen = iolen;
+
+    /*
+     * Ensure the used descriptor is visible before updating the index.
+     * This is necessary on ISAs with memory ordering less strict than x86.
+     */
+    atomic_thread_fence_rel();
     vuh->vu_idx = uidx;
 }
 
@@ -459,6 +468,13 @@
     vs = vq->vq_vs;
     old_idx = vq->vq_save_used;
     vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
+
+    /*
+     * Use full memory barrier between vu_idx store from preceding
+     * vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or
+     * va_flags below.
+     */
+    atomic_thread_fence_seq_cst();
     if (used_all_avail &&
         (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
         intr = 1;
--- head/usr.sbin/bhyve/block_if.c    2019/05/02 19:59:37    347032
+++ head/usr.sbin/bhyve/block_if.c    2019/05/02 22:46:37    347033
@@ -65,7 +65,7 @@
 #define BLOCKIF_SIG    0xb109b109
 
 #define BLOCKIF_NUMTHR    8
-#define BLOCKIF_MAXREQ    (64 + BLOCKIF_NUMTHR)
+#define BLOCKIF_MAXREQ    (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
 
 enum blockop {
     BOP_READ,
--- head/usr.sbin/bhyve/block_if.h    2019/05/02 19:59:37    347032
+++ head/usr.sbin/bhyve/block_if.h    2019/05/02 22:46:37    347033
@@ -41,7 +41,13 @@
 #include <sys/uio.h>
 #include <sys/unistd.h>
 
-#define BLOCKIF_IOV_MAX        33    /* not practical to be IOV_MAX */
+/*
+ * BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in
+ * a single request.  BLOCKIF_RING_MAX is the maxmimum number of
+ * pending requests that can be queued.
+ */
+#define    BLOCKIF_IOV_MAX        128    /* not practical to be IOV_MAX */
+#define    BLOCKIF_RING_MAX    128
 
 struct blockif_req {
     int        br_iovcnt;
--- head/usr.sbin/bhyve/pci_virtio_block.c    2019/05/02 19:59:37    347032
+++ head/usr.sbin/bhyve/pci_virtio_block.c    2019/05/02 22:46:37    347033
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -55,7 +56,9 @@
 #include "virtio.h"
 #include "block_if.h"
 
-#define VTBLK_RINGSZ    64
+#define VTBLK_RINGSZ    128
+
+_Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
 
 #define VTBLK_S_OK    0
 #define VTBLK_S_IOERR    1
@@ -351,7 +354,15 @@
     /* setup virtio block config space */
     sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */
     sc->vbsc_cfg.vbc_size_max = 0;    /* not negotiated */
-    sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX;
+
+    /*
+     * If Linux is presented with a seg_max greater than the virtio queue
+     * size, it can stumble into situations where it violates its own
+     * invariants and panics.  For safety, we keep seg_max clamped, paying
+     * heed to the two extra descriptors needed for the header and status
+     * of a request.
+     */
+    sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
     sc->vbsc_cfg.vbc_geometry.cylinders = 0;    /* no geometry */
     sc->vbsc_cfg.vbc_geometry.heads = 0;
     sc->vbsc_cfg.vbc_geometry.sectors = 0;

I'm using this patch with 12.0-RELEASE and it's working fine :)
 
Darn base compiling with a patch hehe. Not a big deal for me to do (probably worth it for myself), but I was hoping I could tell my friends bhyve was ready for virtualizing Windows

I'll have to give this patch a spin soon, hopefully it'll get ported back to 12.0
 
It is – perfectly works here for 3 years: Windows 7 / 10 / 2019 on ZFS. I never noticed any IO issue.

Wish I could say that. Networking is great, install was great, but a 1MB zip extraction takes a minute. That's not working perfectly
 
I'll have to give this patch a spin soon, hopefully it'll get ported back to 12.0
That's unlikely, as it neither fixes a security hole nor a "bug" (bhyve is documented to be incompatible with windows guests using virtio-blk).
But, as the patch is pretty small and doesn't change any other behavior (as far as I can tell), I could imagine it might be included in an upcoming 12.1.
Wish I could say that. Networking is great, install was great, but a 1MB zip extraction takes a minute. That's not working perfectly
I didn't do any exact measurement, but for me, a windows guest using ahci-hd was usable, but "felt" a bit slow. Switching to virtio-blk did speed up things.
 
That's unlikely, as it neither fixes a security hole nor a "bug" (bhyve is documented to be incompatible with windows guests using virtio-blk).
But, as the patch is pretty small and doesn't change any other behavior (as far as I can tell), I could imagine it might be included in an upcoming 12.1.

I didn't do any exact measurement, but for me, a windows guest using ahci-hd was usable, but "felt" a bit slow. Switching to virtio-blk did speed up things.

Even if it showed up in 12.1 that would be neat. I can't say my Windows VM "feels" slow, it IS slow. 1MB extract of a .zip takes a minute. That isn't "feeling" slow. I wished it did better at this because I have quite a few people who turned down the idea of testing FreeBSD simply because bhyve can't virtualize Windows Server well compared to KVM. bhyve is pretty infant compared to other hypervisors though, so here's to it catching up for Windows guests!
 
That isn't "feeling" slow. I wished it did better at this because I have quite a few people who turned down the idea of testing FreeBSD simply because bhyve can't virtualize Windows Server well compared to KVM. bhyve is pretty infant compared to other hypervisors though, so here's to it catching up for Windows guests!
Alas, it is even so. I have to use VMware Player: though it's not a hypervisor, it gives me faster Windows nevertheless. Or maybe it's the virtual networking driver that slows things down, I don't know. When I connect to my Windows machine on my real office network using xfreerdp as aragats suggests here, the connected Windows is lighting fast in the RDP window. Connected to bhyve it reminds me of Win95 times when computers used to hang every now and then. Just can't believe that virtual NATed network can be way slower than the real one.
 
When I connect to my Windows machine on my real office network using xfreerdp...Windows is lighting fast... Connected to bhyve it reminds me of Win95 times when computers used to hang every now and then.
Are you connecting to the Windows servers using VNC? Yes, that's pretty slow and aside from the installation or recovery, I'd advise against it. Connecting to Windows bhyve VMs via RDP works fine for me.
 
Are you connecting to the Windows servers using VNC? Yes, that's pretty slow and aside from the installation or recovery, I'd advise against it. Connecting to Windows bhyve VMs via RDP works fine for me.
No , man. Using RDP. Comparing here RDP connection to bhyve vs. RDP to office network located Win 10 machine. Fair comparison.
 
Ok, my problem was partly solved by adjusting the bhyve -c value. It turns out, when you pass on -c [>1], Win 10 will understand it as the number of CPUs being more than 1... which it doesn't support, does it. So the right thing to use is bhyve -c sockets=1,cores=2,threads=2. That will give Windows a pretty standard 1 CPU 2 core 4 threads with HT enabled. Now just for experiment's sake I made a separate Win 10 Pro installation using virtio-blk but cannot say whether it outperforms the older one with ahci-hd. Now that I'm using this optimized CPU setting (+6G RAM given it) together with NIC passthrough, it really is quick enough.
 
If you do find big gains with virtio-blk over ahci, let us I know. Although none of our Windows servers are very heavy on disk IO, I'm thinking of converting them if it's significantly faster.

Does anyone know if the patch Zirias mentioned made it into 12.1, or does it still need to be patched in manually? Edit: Looks like it was added in May.
 
1. apply this commit https://reviews.freebsd.org/rS358848
2. using nvme as disk backend or passthough a nvme disk.
3. passthrough a nic instead of virtio-net
4. apply this commit https://reviews.freebsd.org/rS349184
5. apply this commit https://reviews.freebsd.org/rS348779
Thanks for this. Unfortunately #2 isn't possible for me right now, and I suspect #3 won't be practial for most people unless they're running very few VMs or have a ton of physical NICs to passthrough.

The patches are interesting. #4 seems to have been fixed quite some time ago and should have been in 12.1, but it slipped through the cracks and never got MFC'd, so unless manually patched in we won't see it until 12.2. I'll have to look at the source to tell if #5 made it in to 12.1, but I'm guessing not? #1 is very fresh and looks quite promising.

Seeing as work is a ghost town right now, and as a result the guests (and the bhyve server itself) aren't being used, I've got some flexibility to play around with experimental patches. So I think I might just take these for a bit of a test drive while I still can. 😁
 
Thanks for this. Unfortunately #2 isn't possible for me right now, and I suspect #3 won't be practial for most people unless they're running very few VMs or have a ton of physical NICs to passthrough.

The patches are interesting. #4 seems to have been fixed quite some time ago and should have been in 12.1, but it slipped through the cracks and never got MFC'd, so unless manually patched in we won't see it until 12.2. I'll have to look at the source to tell if #5 made it in to 12.1, but I'm guessing not? #1 is very fresh and looks quite promising.

Seeing as work is a ghost town right now, and as a result the guests (and the bhyve server itself) aren't being used, I've got some flexibility to play around with experimental patches. So I think I might just take these for a bit of a test drive while I still can. 😁

#2, just enable nvme as virtual disk controller instead of virtio-scsi/ahci, since nvme controller using less overhead, it really fast than ahci backend. no matter what real disks you have.

#4 is a bug fix. if you select passthrough, or ignore it.
#5, good for intel core i5/i7. since XEON CPU has already good support.

you can just fetch the commit diff, and patch the bhyve only, update bhyve only. no need to upgrade whole OS.

and, for FreeBSD/Linux guests, SR-IOV is good to go. chelsio T520-BT/CR is pretty stable to create multiple VFs and passthrough to guests.
while no such luck for Windows guests. I am writing email to chelsio support. hope in the future they can fix the VF driver for bhyve/Windows guest.
 
and, for FreeBSD/Linux guests, SR-IOV is good to go. chelsio T520-BT/CR is pretty stable to create multiple VFs and passthrough to guests.
Read about this on bhyve dev mailing list. It also mentions that Inte I350-t4 are no good.
Anyway, since you're mentioning this and I couldn't find anything clear enough on the web, let me ask you this. Is SR-IOV support required from the CPU & motherboard also, and not only from the NIC?
Because Intel site implied so by saying about certain CPUs that they have the feature disabled. Then again, the next thing is motherboard chip (and BIOS, of course) that supports all the CPU features or not. But most other sites only mention whether or not this or that NIC supports the feature.
So how does one find out that his hardware supports SR-IOV? For example, I have Intel Xeon e5-2690 and motherboard supporting all its features. But there's no mentioning of SR-IOV there, though I also have Intel I350-t4, that kind of supports it. Though at bhyve-dev list they mentioned it wasn't worth the time to try and implement it with igb driver responsible for that NIC.
Then, of course, it has 4 ports, each of which can be passed through as a separate PCIe device... still, I'm interested.,
 
So how does one find out that his hardware supports SR-IOV? For example, I have Intel Xeon e5-2690 and motherboard
I can't find an Intel page to quote but I know all LGA2011 support SR-IOV on C6xx chipsets..
When it comes to NIC's it is found in Intel, Mellanox and Chelsio 10G cards.
There was a mailing list discussion about 1g Intel Nics but I don't know if that was ever implemented.
 
You don't need patch from #4. It is only needed for E3 Xeons, Not E5 Xeons like you have.
As for #1 and #5 patches I dont think they are absolutely needed either.
I wholeheartedly agree with # 2 and #3 and use both. I don't pass-thru NVMe but host my VM's on them.
I pass-thru all NIC's and let my upstream OPNSense box hand out DHCP IP's to each NIC interface.
Bridges and all that jazz are not ideal in my opinion.
 
As for #1 and #5 patches I dont think they are absolutely needed either.
I applied patch #1 and my Windows vm gained a lot of speed. Some things (like login with a samba AD account) take a fraction of the time they took without this patch. Highly recommended!
I wholeheartedly agree with # 2 and #3 and use both.
Sure, if you want to dedicate some hardware to the vm. I have good results with virtio-blk (backed by a ZFS vdev) and virtio-net though.
 
Do I need to use -CURRENT for these patches to apply? They failed with 12.1.
That's because you get them in a braindead format from the linked site, with only one "hunk" consisting of the whole file. They work on 12.1, but I had to apply them manually, using vim ...

Find attached a patch combining #1 and #4 for 12.1 for your convenience ;)
 

Attachments

  • bhyve-rS349184-rS358848-combined.diff.txt
    12.1 KB · Views: 512
Oh, thank you kindly!!!😁😁
EDIT: looks like it does work faster with these patches (the other ones I already added)! Thank you again, only had to point it to /usr/src.

I only didn't understand about bendany 's point #2, use nvme instead of ahci/virtio-blk. Because bhyve refuses to accept that as argument. Though for me it's fast enough even without it, but would be interesting to try.
 
If you're using sysutils/vm-bhyve (which I highly recommend), it's rather straightforward. Details on how to configure it are on the bottom of the page here.
No, I'm not. At this stage of testing it suits my needs better to use a startup shell script to manually start/stop/destroy a given VM.
Now bhyve says this about nvme type of emulated device:
Code:
NVMe devices:

                 devpath     Accepted device paths are:
                     /dev/blockdev or /path/to/image or
                     ram=size_in_MiB.

                 maxq     Max number of queues.

                 qsz     Max elements in each queue.

                 ioslots     Max number of concurrent I/O re-
                     quests.

                 sectsz     Sector    size (defaults to blockif sec-
                     tor size).

                 ser     Serial    number with maximum 20 charac-
                     ters.
But I just simply used nvme,/path/to/image, which, obviously, wasn't enough or something. So I wonder if sysutils/vm-bhyve uses some defaults there for maxq, qxz, ioslots, sectsz, ser.
 
Back
Top