tun: windows: don't spin unless we really need it

2019-08-18 11:49:37 +02:00 · 2019-08-18 11:49:37 +02:00 · 01786286c1
commit 01786286c1
parent b16dba47a7
1 changed files with 42 additions and 10 deletions
--- a/tun/tun_windows.go
+++ b/tun/tun_windows.go
@ -19,11 +19,14 @@ import (
 )

 const (
-	packetAlignment    uint32 = 4        // Number of bytes packets are aligned to in rings
+	packetAlignment            = 4        // Number of bytes packets are aligned to in rings
 	packetSizeMax              = 0xffff   // Maximum packet size
 	packetCapacity             = 0x800000 // Ring capacity, 8MiB
 	packetTrailingSize         = uint32(unsafe.Sizeof(packetHeader{})) + ((packetSizeMax + (packetAlignment - 1)) &^ (packetAlignment - 1)) - packetAlignment
 	ioctlRegisterRings         = (51820 << 16) | (0x970 << 2) | 0 /*METHOD_BUFFERED*/ | (0x3 /*FILE_READ_DATA | FILE_WRITE_DATA*/ << 14)
+	rateMeasurementGranularity = uint64((time.Second / 2) / time.Nanosecond)
+	spinloopRateThreshold      = 800000000 / 8                                   // 800mbps
+	spinloopDuration           = uint64(time.Millisecond / 80 / time.Nanosecond) // ~1gbit/s
 )

 type packetHeader struct {
@ -50,6 +53,13 @@ type ringDescriptor struct {
 	}
 }

+type rateJuggler struct {
+	current       uint64
+	nextByteCount uint64
+	nextStartTime int64
+	changing      int32
+}
+
 type NativeTun struct {
 	wt        *wintun.Wintun
 	handle    windows.Handle
@ -58,8 +68,15 @@ type NativeTun struct {
 	events    chan Event
 	errors    chan error
 	forcedMTU int
+	rate      rateJuggler
 }

+//go:linkname procyield runtime.procyield
+func procyield(cycles uint32)
+
+//go:linkname nanotime runtime.nanotime
+func nanotime() int64
+
 func packetAlign(size uint32) uint32 {
 	return (size + (packetAlignment - 1)) &^ (packetAlignment - 1)
 }
@ -184,9 +201,6 @@ func (tun *NativeTun) ForceMTU(mtu int) {
 	tun.forcedMTU = mtu
 }

-//go:linkname procyield runtime.procyield
-func procyield(cycles uint32)
-
 // Note: Read() and Write() assume the caller comes only from a single thread; there's no locking.

 func (tun *NativeTun) Read(buff []byte, offset int) (int, error) {
@ -205,7 +219,8 @@ retry:
 		return 0, os.ErrClosed
 	}

-	start := time.Now()
+	start := nanotime()
+	shouldSpin := atomic.LoadUint64(&tun.rate.current) >= spinloopRateThreshold && uint64(start-atomic.LoadInt64(&tun.rate.nextStartTime)) <= rateMeasurementGranularity*2
 	var buffTail uint32
 	for {
 		buffTail = atomic.LoadUint32(&tun.rings.send.ring.tail)
@ -215,7 +230,7 @@ retry:
 		if tun.close {
 			return 0, os.ErrClosed
 		}
-		if time.Since(start) >= time.Millisecond/80 /* ~1gbit/s */ {
+		if !shouldSpin || uint64(nanotime()-start) >= spinloopDuration {
 			windows.WaitForSingleObject(tun.rings.send.tailMoved, windows.INFINITE)
 			goto retry
 		}
@ -243,6 +258,7 @@ retry:
 	copy(buff[offset:], packet.data[:packet.size])
 	buffHead = tun.rings.send.ring.wrap(buffHead + alignedPacketSize)
 	atomic.StoreUint32(&tun.rings.send.ring.head, buffHead)
+	tun.rate.update(uint64(packet.size))
 	return int(packet.size), nil
 }

@ -256,6 +272,7 @@ func (tun *NativeTun) Write(buff []byte, offset int) (int, error) {
 	}

 	packetSize := uint32(len(buff) - offset)
+	tun.rate.update(uint64(packetSize))
 	alignedPacketSize := packetAlign(uint32(unsafe.Sizeof(packetHeader{})) + packetSize)

 	buffHead := atomic.LoadUint32(&tun.rings.receive.ring.head)
@ -292,3 +309,18 @@ func (tun *NativeTun) LUID() uint64 {
 func (rb *ring) wrap(value uint32) uint32 {
 	return value & (packetCapacity - 1)
 }
+
+func (rate *rateJuggler) update(packetLen uint64) {
+	now := nanotime()
+	total := atomic.AddUint64(&rate.nextByteCount, packetLen)
+	period := uint64(now - atomic.LoadInt64(&rate.nextStartTime))
+	if period >= rateMeasurementGranularity {
+		if !atomic.CompareAndSwapInt32(&rate.changing, 0, 1) {
+			return
+		}
+		atomic.StoreInt64(&rate.nextStartTime, now)
+		atomic.StoreUint64(&rate.current, total*uint64(time.Second/time.Nanosecond)/period)
+		atomic.StoreUint64(&rate.nextByteCount, 0)
+		atomic.StoreInt32(&rate.changing, 0)
+	}
+}