c76b818466
This works around a startup race condition when competing with HackListener, which is trying to do the same job. If HackListener detects that the tundev is running while there is still an event in the netlink queue that says it isn't running, then the device receives a string of events like EventUp (HackListener) EventDown (NetlinkListener) EventUp (NetlinkListener) Unfortunately, after the first EventDown, the device stops itself, thinking incorrectly that the administrator has downed its tundev. The device is ignoring the initial EventDown anyway, so just don't emit it. Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
490 lines
9.8 KiB
Go
490 lines
9.8 KiB
Go
/* SPDX-License-Identifier: MIT
|
|
*
|
|
* Copyright (C) 2017-2019 WireGuard LLC. All Rights Reserved.
|
|
*/
|
|
|
|
package tun
|
|
|
|
/* Implementation of the TUN device interface for linux
|
|
*/
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
"unsafe"
|
|
|
|
"golang.org/x/net/ipv6"
|
|
"golang.org/x/sys/unix"
|
|
"golang.zx2c4.com/wireguard/rwcancel"
|
|
)
|
|
|
|
const (
|
|
cloneDevicePath = "/dev/net/tun"
|
|
ifReqSize = unix.IFNAMSIZ + 64
|
|
)
|
|
|
|
type NativeTun struct {
|
|
tunFile *os.File
|
|
index int32 // if index
|
|
errors chan error // async error handling
|
|
events chan Event // device related events
|
|
nopi bool // the device was passed IFF_NO_PI
|
|
netlinkSock int
|
|
netlinkCancel *rwcancel.RWCancel
|
|
hackListenerClosed sync.Mutex
|
|
statusListenersShutdown chan struct{}
|
|
|
|
nameOnce sync.Once // guards calling initNameCache, which sets following fields
|
|
nameCache string // name of interface
|
|
nameErr error
|
|
}
|
|
|
|
func (tun *NativeTun) File() *os.File {
|
|
return tun.tunFile
|
|
}
|
|
|
|
func (tun *NativeTun) routineHackListener() {
|
|
defer tun.hackListenerClosed.Unlock()
|
|
/* This is needed for the detection to work across network namespaces
|
|
* If you are reading this and know a better method, please get in touch.
|
|
*/
|
|
for {
|
|
sysconn, err := tun.tunFile.SyscallConn()
|
|
if err != nil {
|
|
return
|
|
}
|
|
err2 := sysconn.Control(func(fd uintptr) {
|
|
_, err = unix.Write(int(fd), nil)
|
|
})
|
|
if err2 != nil {
|
|
return
|
|
}
|
|
switch err {
|
|
case unix.EINVAL:
|
|
// If the tunnel is up, it reports that write() is
|
|
// allowed but we provided invalid data.
|
|
tun.events <- EventUp
|
|
case unix.EIO:
|
|
// If the tunnel is down, it reports that no I/O
|
|
// is possible, without checking our provided data.
|
|
tun.events <- EventDown
|
|
default:
|
|
return
|
|
}
|
|
select {
|
|
case <-time.After(time.Second):
|
|
// nothing
|
|
case <-tun.statusListenersShutdown:
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func createNetlinkSocket() (int, error) {
|
|
sock, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, unix.NETLINK_ROUTE)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
saddr := &unix.SockaddrNetlink{
|
|
Family: unix.AF_NETLINK,
|
|
Groups: unix.RTMGRP_LINK | unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR,
|
|
}
|
|
err = unix.Bind(sock, saddr)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
return sock, nil
|
|
}
|
|
|
|
func (tun *NativeTun) routineNetlinkListener() {
|
|
defer func() {
|
|
unix.Close(tun.netlinkSock)
|
|
tun.hackListenerClosed.Lock()
|
|
close(tun.events)
|
|
}()
|
|
|
|
for msg := make([]byte, 1<<16); ; {
|
|
|
|
var err error
|
|
var msgn int
|
|
for {
|
|
msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0)
|
|
if err == nil || !rwcancel.RetryAfterError(err) {
|
|
break
|
|
}
|
|
if !tun.netlinkCancel.ReadyRead() {
|
|
tun.errors <- fmt.Errorf("netlink socket closed: %s", err.Error())
|
|
return
|
|
}
|
|
}
|
|
if err != nil {
|
|
tun.errors <- fmt.Errorf("failed to receive netlink message: %s", err.Error())
|
|
return
|
|
}
|
|
|
|
select {
|
|
case <-tun.statusListenersShutdown:
|
|
return
|
|
default:
|
|
}
|
|
|
|
wasEverUp := false
|
|
for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; {
|
|
|
|
hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0]))
|
|
|
|
if int(hdr.Len) > len(remain) {
|
|
break
|
|
}
|
|
|
|
switch hdr.Type {
|
|
case unix.NLMSG_DONE:
|
|
remain = []byte{}
|
|
|
|
case unix.RTM_NEWLINK:
|
|
info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr]))
|
|
remain = remain[hdr.Len:]
|
|
|
|
if info.Index != tun.index {
|
|
// not our interface
|
|
continue
|
|
}
|
|
|
|
if info.Flags&unix.IFF_RUNNING != 0 {
|
|
tun.events <- EventUp
|
|
wasEverUp = true
|
|
}
|
|
|
|
if info.Flags&unix.IFF_RUNNING == 0 {
|
|
// Don't emit EventDown before we've ever emitted EventUp.
|
|
// This avoids a startup race with HackListener, which
|
|
// might detect Up before we have finished reporting Down.
|
|
if wasEverUp {
|
|
tun.events <- EventDown
|
|
}
|
|
}
|
|
|
|
tun.events <- EventMTUUpdate
|
|
|
|
default:
|
|
remain = remain[hdr.Len:]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func getIFIndex(name string) (int32, error) {
|
|
fd, err := unix.Socket(
|
|
unix.AF_INET,
|
|
unix.SOCK_DGRAM,
|
|
0,
|
|
)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
defer unix.Close(fd)
|
|
|
|
var ifr [ifReqSize]byte
|
|
copy(ifr[:], name)
|
|
_, _, errno := unix.Syscall(
|
|
unix.SYS_IOCTL,
|
|
uintptr(fd),
|
|
uintptr(unix.SIOCGIFINDEX),
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
)
|
|
|
|
if errno != 0 {
|
|
return 0, errno
|
|
}
|
|
|
|
return *(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])), nil
|
|
}
|
|
|
|
func (tun *NativeTun) setMTU(n int) error {
|
|
name, err := tun.Name()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// open datagram socket
|
|
fd, err := unix.Socket(
|
|
unix.AF_INET,
|
|
unix.SOCK_DGRAM,
|
|
0,
|
|
)
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
defer unix.Close(fd)
|
|
|
|
// do ioctl call
|
|
var ifr [ifReqSize]byte
|
|
copy(ifr[:], name)
|
|
*(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = uint32(n)
|
|
_, _, errno := unix.Syscall(
|
|
unix.SYS_IOCTL,
|
|
uintptr(fd),
|
|
uintptr(unix.SIOCSIFMTU),
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
)
|
|
|
|
if errno != 0 {
|
|
return errors.New("failed to set MTU of TUN device")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (tun *NativeTun) MTU() (int, error) {
|
|
name, err := tun.Name()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
// open datagram socket
|
|
fd, err := unix.Socket(
|
|
unix.AF_INET,
|
|
unix.SOCK_DGRAM,
|
|
0,
|
|
)
|
|
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
defer unix.Close(fd)
|
|
|
|
// do ioctl call
|
|
|
|
var ifr [ifReqSize]byte
|
|
copy(ifr[:], name)
|
|
_, _, errno := unix.Syscall(
|
|
unix.SYS_IOCTL,
|
|
uintptr(fd),
|
|
uintptr(unix.SIOCGIFMTU),
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
)
|
|
if errno != 0 {
|
|
return 0, errors.New("failed to get MTU of TUN device: " + errno.Error())
|
|
}
|
|
|
|
return int(*(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ]))), nil
|
|
}
|
|
|
|
func (tun *NativeTun) Name() (string, error) {
|
|
tun.nameOnce.Do(tun.initNameCache)
|
|
return tun.nameCache, tun.nameErr
|
|
}
|
|
|
|
func (tun *NativeTun) initNameCache() {
|
|
tun.nameCache, tun.nameErr = tun.nameSlow()
|
|
}
|
|
|
|
func (tun *NativeTun) nameSlow() (string, error) {
|
|
sysconn, err := tun.tunFile.SyscallConn()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
var ifr [ifReqSize]byte
|
|
var errno syscall.Errno
|
|
err = sysconn.Control(func(fd uintptr) {
|
|
_, _, errno = unix.Syscall(
|
|
unix.SYS_IOCTL,
|
|
fd,
|
|
uintptr(unix.TUNGETIFF),
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
)
|
|
})
|
|
if err != nil {
|
|
return "", errors.New("failed to get name of TUN device: " + err.Error())
|
|
}
|
|
if errno != 0 {
|
|
return "", errors.New("failed to get name of TUN device: " + errno.Error())
|
|
}
|
|
name := ifr[:]
|
|
if i := bytes.IndexByte(name, 0); i != -1 {
|
|
name = name[:i]
|
|
}
|
|
return string(name), nil
|
|
}
|
|
|
|
func (tun *NativeTun) Write(buff []byte, offset int) (int, error) {
|
|
|
|
if tun.nopi {
|
|
buff = buff[offset:]
|
|
} else {
|
|
// reserve space for header
|
|
|
|
buff = buff[offset-4:]
|
|
|
|
// add packet information header
|
|
|
|
buff[0] = 0x00
|
|
buff[1] = 0x00
|
|
|
|
if buff[4]>>4 == ipv6.Version {
|
|
buff[2] = 0x86
|
|
buff[3] = 0xdd
|
|
} else {
|
|
buff[2] = 0x08
|
|
buff[3] = 0x00
|
|
}
|
|
}
|
|
|
|
// write
|
|
|
|
return tun.tunFile.Write(buff)
|
|
}
|
|
|
|
func (tun *NativeTun) Flush() error {
|
|
// TODO: can flushing be implemented by buffering and using sendmmsg?
|
|
return nil
|
|
}
|
|
|
|
func (tun *NativeTun) Read(buff []byte, offset int) (int, error) {
|
|
select {
|
|
case err := <-tun.errors:
|
|
return 0, err
|
|
default:
|
|
if tun.nopi {
|
|
return tun.tunFile.Read(buff[offset:])
|
|
} else {
|
|
buff := buff[offset-4:]
|
|
n, err := tun.tunFile.Read(buff[:])
|
|
if n < 4 {
|
|
return 0, err
|
|
}
|
|
return n - 4, err
|
|
}
|
|
}
|
|
}
|
|
|
|
func (tun *NativeTun) Events() chan Event {
|
|
return tun.events
|
|
}
|
|
|
|
func (tun *NativeTun) Close() error {
|
|
var err1 error
|
|
if tun.statusListenersShutdown != nil {
|
|
close(tun.statusListenersShutdown)
|
|
if tun.netlinkCancel != nil {
|
|
err1 = tun.netlinkCancel.Cancel()
|
|
}
|
|
} else if tun.events != nil {
|
|
close(tun.events)
|
|
}
|
|
err2 := tun.tunFile.Close()
|
|
|
|
if err1 != nil {
|
|
return err1
|
|
}
|
|
return err2
|
|
}
|
|
|
|
func CreateTUN(name string, mtu int) (Device, error) {
|
|
nfd, err := unix.Open(cloneDevicePath, os.O_RDWR, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var ifr [ifReqSize]byte
|
|
var flags uint16 = unix.IFF_TUN // | unix.IFF_NO_PI (disabled for TUN status hack)
|
|
nameBytes := []byte(name)
|
|
if len(nameBytes) >= unix.IFNAMSIZ {
|
|
return nil, errors.New("interface name too long")
|
|
}
|
|
copy(ifr[:], nameBytes)
|
|
*(*uint16)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = flags
|
|
|
|
_, _, errno := unix.Syscall(
|
|
unix.SYS_IOCTL,
|
|
uintptr(nfd),
|
|
uintptr(unix.TUNSETIFF),
|
|
uintptr(unsafe.Pointer(&ifr[0])),
|
|
)
|
|
if errno != 0 {
|
|
return nil, errno
|
|
}
|
|
err = unix.SetNonblock(nfd, true)
|
|
|
|
// Note that the above -- open,ioctl,nonblock -- must happen prior to handing it to netpoll as below this line.
|
|
|
|
fd := os.NewFile(uintptr(nfd), cloneDevicePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return CreateTUNFromFile(fd, mtu)
|
|
}
|
|
|
|
func CreateTUNFromFile(file *os.File, mtu int) (Device, error) {
|
|
tun := &NativeTun{
|
|
tunFile: file,
|
|
events: make(chan Event, 5),
|
|
errors: make(chan error, 5),
|
|
statusListenersShutdown: make(chan struct{}),
|
|
nopi: false,
|
|
}
|
|
|
|
name, err := tun.Name()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// start event listener
|
|
|
|
tun.index, err = getIFIndex(name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
tun.netlinkSock, err = createNetlinkSocket()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tun.netlinkCancel, err = rwcancel.NewRWCancel(tun.netlinkSock)
|
|
if err != nil {
|
|
unix.Close(tun.netlinkSock)
|
|
return nil, err
|
|
}
|
|
|
|
tun.hackListenerClosed.Lock()
|
|
go tun.routineNetlinkListener()
|
|
go tun.routineHackListener() // cross namespace
|
|
|
|
err = tun.setMTU(mtu)
|
|
if err != nil {
|
|
unix.Close(tun.netlinkSock)
|
|
return nil, err
|
|
}
|
|
|
|
return tun, nil
|
|
}
|
|
|
|
func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) {
|
|
err := unix.SetNonblock(fd, true)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
file := os.NewFile(uintptr(fd), "/dev/tun")
|
|
tun := &NativeTun{
|
|
tunFile: file,
|
|
events: make(chan Event, 5),
|
|
errors: make(chan error, 5),
|
|
nopi: true,
|
|
}
|
|
name, err := tun.Name()
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
return tun, name, nil
|
|
}
|