device: get rid of nonce routine

This moves to a simple queue with no routine processing it, to reduce
scheduler pressure.

This splits latency in half!

benchmark                  old ns/op     new ns/op     delta
BenchmarkThroughput-16     2394          2364          -1.25%
BenchmarkLatency-16        259652        120810        -53.47%

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
This commit is contained in:
Jason A. Donenfeld 2021-01-27 18:13:53 +01:00
parent a11dec5dc1
commit 1b092ce584
8 changed files with 75 additions and 170 deletions

View file

@ -16,10 +16,6 @@ import (
"golang.zx2c4.com/wireguard/conn" "golang.zx2c4.com/wireguard/conn"
) )
const (
PeerRoutineNumber = 2
)
type Peer struct { type Peer struct {
isRunning AtomicBool isRunning AtomicBool
sync.RWMutex // Mostly protects endpoint, but is generally taken whenever we modify peer sync.RWMutex // Mostly protects endpoint, but is generally taken whenever we modify peer
@ -54,17 +50,11 @@ type Peer struct {
sentLastMinuteHandshake AtomicBool sentLastMinuteHandshake AtomicBool
} }
signals struct {
newKeypairArrived chan struct{}
flushNonceQueue chan struct{}
}
queue struct { queue struct {
sync.RWMutex sync.RWMutex
nonce chan *QueueOutboundElement // nonce / pre-handshake queue staged chan *QueueOutboundElement // staged packets before a handshake is available
outbound chan *QueueOutboundElement // sequential ordering of work outbound chan *QueueOutboundElement // sequential ordering of work
inbound chan *QueueInboundElement // sequential ordering of work inbound chan *QueueInboundElement // sequential ordering of work
packetInNonceQueueIsAwaitingKey AtomicBool
} }
routines struct { routines struct {
@ -197,25 +187,20 @@ func (peer *Peer) Start() {
peer.routines.stopping.Wait() peer.routines.stopping.Wait()
peer.routines.stop = make(chan struct{}) peer.routines.stop = make(chan struct{})
peer.routines.stopping.Add(PeerRoutineNumber) peer.routines.stopping.Add(1)
// prepare queues // prepare queues
peer.queue.Lock() peer.queue.Lock()
peer.queue.nonce = make(chan *QueueOutboundElement, QueueOutboundSize) peer.queue.staged = make(chan *QueueOutboundElement, QueueStagedSize)
peer.queue.outbound = make(chan *QueueOutboundElement, QueueOutboundSize) peer.queue.outbound = make(chan *QueueOutboundElement, QueueOutboundSize)
peer.queue.inbound = make(chan *QueueInboundElement, QueueInboundSize) peer.queue.inbound = make(chan *QueueInboundElement, QueueInboundSize)
peer.queue.Unlock() peer.queue.Unlock()
peer.timersInit() peer.timersInit()
peer.handshake.lastSentHandshake = time.Now().Add(-(RekeyTimeout + time.Second)) peer.handshake.lastSentHandshake = time.Now().Add(-(RekeyTimeout + time.Second))
peer.signals.newKeypairArrived = make(chan struct{}, 1)
peer.signals.flushNonceQueue = make(chan struct{}, 1)
// wait for routines to start // wait for routines to start
// RoutineNonce writes to the encryption queue; keep it alive until we are done.
device.queue.encryption.wg.Add(1)
go peer.RoutineNonce()
go peer.RoutineSequentialSender() go peer.RoutineSequentialSender()
go peer.RoutineSequentialReceiver() go peer.RoutineSequentialReceiver()
@ -245,7 +230,7 @@ func (peer *Peer) ZeroAndFlushAll() {
handshake.Clear() handshake.Clear()
handshake.mutex.Unlock() handshake.mutex.Unlock()
peer.FlushNonceQueue() peer.FlushStagedPackets()
} }
func (peer *Peer) ExpireCurrentKeypairs() { func (peer *Peer) ExpireCurrentKeypairs() {
@ -291,8 +276,8 @@ func (peer *Peer) Stop() {
// close queues // close queues
peer.queue.Lock() peer.queue.Lock()
close(peer.queue.nonce)
close(peer.queue.inbound) close(peer.queue.inbound)
close(peer.queue.outbound)
peer.queue.Unlock() peer.queue.Unlock()
peer.ZeroAndFlushAll() peer.ZeroAndFlushAll()

View file

@ -8,6 +8,7 @@ package device
/* Reduce memory consumption for Android */ /* Reduce memory consumption for Android */
const ( const (
QueueStagedSize = 128
QueueOutboundSize = 1024 QueueOutboundSize = 1024
QueueInboundSize = 1024 QueueInboundSize = 1024
QueueHandshakeSize = 1024 QueueHandshakeSize = 1024

View file

@ -8,6 +8,7 @@
package device package device
const ( const (
QueueStagedSize = 128
QueueOutboundSize = 1024 QueueOutboundSize = 1024
QueueInboundSize = 1024 QueueInboundSize = 1024
QueueHandshakeSize = 1024 QueueHandshakeSize = 1024

View file

@ -10,6 +10,7 @@ package device
/* Fit within memory limits for iOS's Network Extension API, which has stricter requirements */ /* Fit within memory limits for iOS's Network Extension API, which has stricter requirements */
const ( const (
QueueStagedSize = 128
QueueOutboundSize = 1024 QueueOutboundSize = 1024
QueueInboundSize = 1024 QueueInboundSize = 1024
QueueHandshakeSize = 1024 QueueHandshakeSize = 1024

View file

@ -427,10 +427,6 @@ func (device *Device) RoutineHandshake() {
peer.timersSessionDerived() peer.timersSessionDerived()
peer.timersHandshakeComplete() peer.timersHandshakeComplete()
peer.SendKeepalive() peer.SendKeepalive()
select {
case peer.signals.newKeypairArrived <- struct{}{}:
default:
}
} }
} }
} }
@ -485,10 +481,7 @@ func (peer *Peer) RoutineSequentialReceiver() {
// check if using new keypair // check if using new keypair
if peer.ReceivedWithKeypair(elem.keypair) { if peer.ReceivedWithKeypair(elem.keypair) {
peer.timersHandshakeComplete() peer.timersHandshakeComplete()
select { peer.SendStagedPackets()
case peer.signals.newKeypairArrived <- struct{}{}:
default:
}
} }
peer.keepKeyFreshReceiving() peer.keepKeyFreshReceiving()

View file

@ -71,41 +71,26 @@ func (elem *QueueOutboundElement) clearPointers() {
elem.peer = nil elem.peer = nil
} }
func addToNonceQueue(queue chan *QueueOutboundElement, elem *QueueOutboundElement, device *Device) {
for {
select {
case queue <- elem:
return
default:
select {
case old := <-queue:
device.PutMessageBuffer(old.buffer)
device.PutOutboundElement(old)
default:
}
}
}
}
/* Queues a keepalive if no packets are queued for peer /* Queues a keepalive if no packets are queued for peer
*/ */
func (peer *Peer) SendKeepalive() bool { func (peer *Peer) SendKeepalive() {
var elem *QueueOutboundElement
peer.queue.RLock() peer.queue.RLock()
defer peer.queue.RUnlock() if len(peer.queue.staged) != 0 || !peer.isRunning.Get() {
if len(peer.queue.nonce) != 0 || peer.queue.packetInNonceQueueIsAwaitingKey.Get() || !peer.isRunning.Get() { goto out
return false
} }
elem := peer.device.NewOutboundElement() elem = peer.device.NewOutboundElement()
elem.packet = nil elem.packet = nil
select { select {
case peer.queue.nonce <- elem: case peer.queue.staged <- elem:
peer.device.log.Verbosef("%v - Sending keepalive packet", peer) peer.device.log.Verbosef("%v - Sending keepalive packet", peer)
return true
default: default:
peer.device.PutMessageBuffer(elem.buffer) peer.device.PutMessageBuffer(elem.buffer)
peer.device.PutOutboundElement(elem) peer.device.PutOutboundElement(elem)
return false
} }
out:
peer.queue.RUnlock()
peer.SendStagedPackets()
} }
func (peer *Peer) SendHandshakeInitiation(isRetry bool) error { func (peer *Peer) SendHandshakeInitiation(isRetry bool) error {
@ -220,7 +205,7 @@ func (peer *Peer) keepKeyFreshSending() {
} }
/* Reads packets from the TUN and inserts /* Reads packets from the TUN and inserts
* into nonce queue for peer * into staged queue for peer
* *
* Obs. Single instance per TUN device * Obs. Single instance per TUN device
*/ */
@ -287,136 +272,53 @@ func (device *Device) RoutineReadFromTUN() {
if peer == nil { if peer == nil {
continue continue
} }
// insert into nonce/pre-handshake queue
peer.queue.RLock()
if peer.isRunning.Get() { if peer.isRunning.Get() {
if peer.queue.packetInNonceQueueIsAwaitingKey.Get() { peer.StagePacket(elem)
peer.SendHandshakeInitiation(false)
}
addToNonceQueue(peer.queue.nonce, elem, device)
elem = nil elem = nil
peer.SendStagedPackets()
} }
peer.queue.RUnlock()
} }
} }
func (peer *Peer) FlushNonceQueue() { func (peer *Peer) StagePacket(elem *QueueOutboundElement) {
for {
select { select {
case peer.signals.flushNonceQueue <- struct{}{}: case peer.queue.staged <- elem:
return
default: default:
select {
case tooOld := <-peer.queue.staged:
peer.device.PutMessageBuffer(tooOld.buffer)
peer.device.PutOutboundElement(tooOld)
default:
}
}
} }
} }
/* Queues packets when there is no handshake. func (peer *Peer) SendStagedPackets() {
* Then assigns nonces to packets sequentially top:
* and creates "work" structs for workers if len(peer.queue.staged) == 0 || !peer.device.isUp.Get() {
*
* Obs. A single instance per peer
*/
func (peer *Peer) RoutineNonce() {
var keypair *Keypair
device := peer.device
flush := func() {
for {
select {
case elem := <-peer.queue.nonce:
device.PutMessageBuffer(elem.buffer)
device.PutOutboundElement(elem)
default:
return
}
}
}
defer func() {
flush()
device.log.Verbosef("%v - Routine: nonce worker - stopped", peer)
peer.queue.packetInNonceQueueIsAwaitingKey.Set(false)
device.queue.encryption.wg.Done() // no more writes from us
close(peer.queue.outbound) // no more writes to this channel
peer.routines.stopping.Done()
}()
device.log.Verbosef("%v - Routine: nonce worker - started", peer)
NextPacket:
for {
peer.queue.packetInNonceQueueIsAwaitingKey.Set(false)
select {
case <-peer.routines.stop:
return
case <-peer.signals.flushNonceQueue:
flush()
continue NextPacket
case elem, ok := <-peer.queue.nonce:
if !ok {
return return
} }
// make sure to always pick the newest key keypair := peer.keypairs.Current()
if keypair == nil || atomic.LoadUint64(&keypair.sendNonce) >= RejectAfterMessages || time.Since(keypair.created) >= RejectAfterTime {
for {
// check validity of newest key pair
keypair = peer.keypairs.Current()
if keypair != nil && atomic.LoadUint64(&keypair.sendNonce) < RejectAfterMessages {
if time.Since(keypair.created) < RejectAfterTime {
break
}
}
peer.queue.packetInNonceQueueIsAwaitingKey.Set(true)
// no suitable key pair, request for new handshake
select {
case <-peer.signals.newKeypairArrived:
default:
}
peer.SendHandshakeInitiation(false) peer.SendHandshakeInitiation(false)
// wait for key to be established
device.log.Verbosef("%v - Awaiting keypair", peer)
select {
case <-peer.signals.newKeypairArrived:
device.log.Verbosef("%v - Obtained awaited keypair", peer)
case <-peer.signals.flushNonceQueue:
device.PutMessageBuffer(elem.buffer)
device.PutOutboundElement(elem)
flush()
continue NextPacket
case <-peer.routines.stop:
device.PutMessageBuffer(elem.buffer)
device.PutOutboundElement(elem)
return return
} }
} peer.device.queue.encryption.wg.Add(1)
peer.queue.packetInNonceQueueIsAwaitingKey.Set(false) defer peer.device.queue.encryption.wg.Done()
// populate work element
for {
select {
case elem := <-peer.queue.staged:
elem.peer = peer elem.peer = peer
elem.nonce = atomic.AddUint64(&keypair.sendNonce, 1) - 1 elem.nonce = atomic.AddUint64(&keypair.sendNonce, 1) - 1
// double check in case of race condition added by future code
if elem.nonce >= RejectAfterMessages { if elem.nonce >= RejectAfterMessages {
atomic.StoreUint64(&keypair.sendNonce, RejectAfterMessages) atomic.StoreUint64(&keypair.sendNonce, RejectAfterMessages)
device.PutMessageBuffer(elem.buffer) peer.StagePacket(elem) // XXX: Out of order, but we can't front-load go chans
device.PutOutboundElement(elem) goto top
continue NextPacket
} }
elem.keypair = keypair elem.keypair = keypair
@ -424,7 +326,21 @@ NextPacket:
// add to parallel and sequential queue // add to parallel and sequential queue
peer.queue.outbound <- elem peer.queue.outbound <- elem
device.queue.encryption.c <- elem peer.device.queue.encryption.c <- elem
default:
return
}
}
}
func (peer *Peer) FlushStagedPackets() {
for {
select {
case elem := <-peer.queue.staged:
peer.device.PutMessageBuffer(elem.buffer)
peer.device.PutOutboundElement(elem)
default:
return
} }
} }
} }

View file

@ -87,7 +87,7 @@ func expiredRetransmitHandshake(peer *Peer) {
/* We drop all packets without a keypair and don't try again, /* We drop all packets without a keypair and don't try again,
* if we try unsuccessfully for too long to make a handshake. * if we try unsuccessfully for too long to make a handshake.
*/ */
peer.FlushNonceQueue() peer.FlushStagedPackets()
/* We set a timer for destroying any residue that might be left /* We set a timer for destroying any residue that might be left
* of a partial exchange. * of a partial exchange.

View file

@ -156,6 +156,7 @@ func (device *Device) IpcSetOperation(r io.Reader) (err error) {
if deviceConfig { if deviceConfig {
deviceConfig = false deviceConfig = false
} }
peer.handlePostConfig()
// Load/create the peer we are now configuring. // Load/create the peer we are now configuring.
err := device.handlePublicKeyLine(peer, value) err := device.handlePublicKeyLine(peer, value)
if err != nil { if err != nil {
@ -174,6 +175,7 @@ func (device *Device) IpcSetOperation(r io.Reader) (err error) {
return err return err
} }
} }
peer.handlePostConfig()
if err := scanner.Err(); err != nil { if err := scanner.Err(); err != nil {
return ipcErrorf(ipc.IpcErrorIO, "failed to read input: %w", err) return ipcErrorf(ipc.IpcErrorIO, "failed to read input: %w", err)
@ -241,6 +243,12 @@ type ipcSetPeer struct {
created bool // new reports whether this is a newly created peer created bool // new reports whether this is a newly created peer
} }
func (peer *ipcSetPeer) handlePostConfig() {
if peer.Peer != nil && !peer.dummy && peer.Peer.device.isUp.Get() {
peer.SendStagedPackets()
}
}
func (device *Device) handlePublicKeyLine(peer *ipcSetPeer, value string) error { func (device *Device) handlePublicKeyLine(peer *ipcSetPeer, value string) error {
// Load/create the peer we are configuring. // Load/create the peer we are configuring.
var publicKey NoisePublicKey var publicKey NoisePublicKey