Docker and nftables gotchas_

🇺🇦 Resources to help support the people of Ukraine. 🇺🇦
April 26, 2022 @10:20

So it seems that despite nftables being The Way Forward for the Linux kernel firewall since kernel 3.13 or so the CADT over at Docker don't seem to have bothered supporting nftables, mostly seeming to assume that people will keep using the iptables compatibility shims. This manifested as build failures for a container on one of the new systems I'm building due to a build step's inability to reach my DNS servers.

Since I decided to adopt nftables as the new standard I am left trying to figure out how to make it work with Docker. Though a bit dated it appears that this advice from Maximilian Ehlers still works on Debian 11 (Bullseye). I changed the basic ruleset I am using to what is below and it seems to be working. At least I'm one step closer to decommissioning most of the Raspberry Pi systems in the network.

define BLOCKED_TCP_PORTS = {
    pop3,
    auth,
    netbios-ns,
    netbios-dgm,
    netbios-ssn,
    microsoft-ds,
    ms-sql-s,
    ms-sql-m,
    585,
    2967,
    5900,
    8763,
    27020-27050,
    50010,
    50020,
    50070,
    50075,
    50090
}

define BLOCKED_UDP_PORTS = {
    bootps,
    netbios-ns,
    netbios-dgm,
    netbios-ssn,
    snmp,
    snmptrap,
    ipp,
    1026,
    1228,
    ms-sql-s,
    ms-sql-m,
    1534,
    1900,
    2222,
    sip,
    5678,
    6969,
    8763,
    10001,
    17500,
    20720-27050,
    34196,
    51729,
    57621
}   

define MY_V4_NETWORKS = {
    192.168.42.0/28,
    192.168.196.0/23
}

define MY_V6_NETWORKS = {
    2606:c380:c001:3::/64,
    2606:c380:c:1::fffc/128
}

# A lot of this sucks because Docker sucks.  Docker apperantly doesn't
# support nftables directly so it is using the iptables wrapper and so
# we have to match its behavior or Docker fails to do anything right.
#
# This doesn't really surprise me.
table ip filter {
    counter c_rejected {
        comment "Rejected packets"
    }

    counter c_dropped {
        comment "Dropped packets"
    }

    chain drop-no-log {
        counter name c_dropped
        drop
    }

    chain log-and-reject {
        counter name c_rejected
        log prefix "[nftables REJECT]: " \
            limit rate 5/second \
        reject
    }

    chain INPUT {
        type filter hook input priority 0; policy drop;
        ct state vmap {
            established: accept,
            related: accept,
            invalid: drop
        } counter
        ip protocol icmp counter accept 
        iifname lo counter accept

        # Add any local overrides here.
        include "ruleset.input.*"

        # All servers are allowed a few things.
        # ssh
        ip saddr $MY_V4_NETWORKS tcp dport 22 \
            counter accept comment "SSH"

        # dns
        tcp dport 53 counter accept comment "DNS"
        udp dport 53 counter accept comment "DNS"

        # mdns 224.0.0.251 or [FF02::FB]
        ip daddr 224.0.0.251/32 udp dport 5353 \
            counter accept comment "mDNS"

        # ntp
        ip saddr $MY_V4_NETWORKS tcp sport 123 \
            counter accept comment "NTP"

        # Accept SYNs on high ports
        tcp dport 1024-65535 tcp flags syn \
            counter accept

        # Drop known noisy traffic so it does not get logged.
        tcp dport $BLOCKED_TCP_PORTS goto drop-no-log
        udp dport $BLOCKED_UDP_PORTS goto drop-no-log
        ip daddr 224.0.0.1/32 goto drop-no-log
        goto log-and-reject
    }

    chain FORWARD {
        type filter hook forward priority 0; policy drop;
        goto log-and-reject
    }

    chain OUTPUT {
        type filter hook output priority 0; policy accept;

        # Add any local overrides here.
        include "rulset.output.*"
    }
}

table ip6 filter {
    counter c_rejected {
        comment "Rejected packets"
    }

    counter c_dropped {
        comment "Dropped packets"
    }

    chain drop-no-log {
        counter name c_dropped
        drop
    }

    chain log-and-reject {
        counter name c_rejected
        log prefix "[nftables6 REJECT]: " \
            limit rate 5/second \
        reject
    }

    chain INPUT {
        type filter hook input priority 0; policy drop;
        ct state vmap {
            established: accept,
            related: accept,
            invalid: drop
        } counter
        ip6 nexthdr icmpv6 counter accept
        ip6 nexthdr ipv6-nonxt counter drop
        iifname lo counter accept

        # Add any local overrides here.
        include "ruleset.input6.*"

        # All servers are allowed a few things.
        # ssh
        ip6 saddr $MY_V6_NETWORKS tcp dport 22 \
            counter accept comment "SSH"

        # dns
        tcp dport 53 counter accept comment "DNS"
        udp dport 53 counter accept comment "DNS"

        # mdns 224.0.0.251 or [FF02::FB]
        ip6 daddr ff02::fb/128 udp dport 5353 \
            counter accept comment "mDNS"

        # ntp
        ip6 saddr $MY_V6_NETWORKS tcp sport 123 \
            counter accept comment "NTP"

        # Accept SYNs on high ports
        tcp dport 1024-65535 tcp flags syn \
            counter accept

        # Drop known noisy traffic so it does not get logged.
        tcp dport $BLOCKED_TCP_PORTS goto drop-no-log
        udp dport $BLOCKED_UDP_PORTS goto drop-no-log
        goto log-and-reject
    }

    chain FORWARD {
        type filter hook forward priority 0; policy drop;
        goto log-and-reject
    }

    chain OUTPUT {
        type filter hook output priority 0; policy accept;

        # Add any local overrides here.
        include "rulset.output6.*"
    }
}

After restarting the Docker daemon, we end up with a loaded ruleset that looks like this.

table ip filter {
        counter c_rejected {
                comment "Rejected packets"
                packets 24 bytes 1504
        }

        counter c_dropped {
                comment "Dropped packets"
                packets 12 bytes 1352
        }

        chain drop-no-log {
                counter name "c_dropped"
                drop
        }

        chain log-and-reject {
                counter name "c_rejected"
                log prefix "[nftables REJECT]: " limit rate 5/second reject
        }

        chain INPUT {
                type filter hook input priority filter; policy drop;
                ct state vmap { invalid : drop, established : accept, related : accept } counter packets 0 bytes 0
                ip protocol icmp counter packets 0 bytes 0 accept
                iifname "lo" counter packets 0 bytes 0 accept
                ip saddr { 192.168.42.0/28, 192.168.196.0/23 } tcp dport 5000 counter packets 0 bytes 0 accept comment "Airplay RTSP"
                ip saddr { 192.168.42.0/28, 192.168.196.0/23 } udp dport 6001-6101 counter packets 0 bytes 0 accept comment "Airplay UDP Transport"
                ip saddr { 192.168.42.0/28, 192.168.196.0/23 } tcp dport 22 counter packets 0 bytes 0 accept comment "SSH"
                tcp dport 53 counter packets 0 bytes 0 accept comment "DNS"
                udp dport 53 counter packets 0 bytes 0 accept comment "DNS"
                ip daddr 224.0.0.251 udp dport 5353 counter packets 71 bytes 15441 accept comment "mDNS"
                ip saddr { 192.168.42.0/28, 192.168.196.0/23 } tcp sport 123 counter packets 0 bytes 0 accept comment "NTP"
                tcp dport 1024-65535 tcp flags syn counter packets 0 bytes 0 accept
                tcp dport { 110, 113, 137-139, 445, 585, 1433-1434, 2967, 5900, 8763, 27020-27050, 50010, 50020, 50070, 50075, 50090 } goto drop-no-log
                udp dport { 67, 137-139, 161-162, 631, 1026, 1228, 1433-1434, 1534, 1900, 2222, 5060, 5678, 6969, 8763, 10001, 17500, 20720-27050, 34196, 51729, 57621 } goto drop-no-log
                ip daddr 224.0.0.1 goto drop-no-log
                goto log-and-reject
        }

        chain FORWARD {
                type filter hook forward priority filter; policy drop;
                counter packets 0 bytes 0 jump DOCKER-USER
                counter packets 0 bytes 0 jump DOCKER-ISOLATION-STAGE-1
                oifname "docker0" ct state related,established counter packets 0 bytes 0 accept
                oifname "docker0" counter packets 0 bytes 0 jump DOCKER
                iifname "docker0" oifname != "docker0" counter packets 0 bytes 0 accept
                iifname "docker0" oifname "docker0" counter packets 0 bytes 0 accept
                goto log-and-reject
        }

        chain OUTPUT {
                type filter hook output priority filter; policy accept;
        }

        chain DOCKER {
        }

        chain DOCKER-ISOLATION-STAGE-1 {
                iifname "docker0" oifname != "docker0" counter packets 0 bytes 0 jump DOCKER-ISOLATION-STAGE-2
                counter packets 0 bytes 0 return
        }

        chain DOCKER-ISOLATION-STAGE-2 {
                oifname "docker0" counter packets 0 bytes 0 drop
                counter packets 0 bytes 0 return
        }

        chain DOCKER-USER {
                counter packets 0 bytes 0 return
        }
}
table ip6 filter {
        counter c_rejected {
                comment "Rejected packets"
                packets 0 bytes 0
        }

        counter c_dropped {
                comment "Dropped packets"
                packets 0 bytes 0
        }

        chain drop-no-log {
                counter name "c_dropped"
                drop
        }

        chain log-and-reject {
                counter name "c_rejected"
                log prefix "[nftables6 REJECT]: " limit rate 5/second reject
        }

        chain INPUT {
                type filter hook input priority filter; policy drop;
                ct state vmap { invalid : drop, established : accept, related : accept } counter packets 0 bytes 0
                ip6 nexthdr ipv6-icmp counter packets 0 bytes 0 accept
                ip6 nexthdr ipv6-nonxt counter packets 0 bytes 0 drop
                iifname "lo" counter packets 0 bytes 0 accept
                ip6 saddr { 2606:c380:c:1::fffc, 2606:c380:c001:3::/64 } tcp dport 5000 counter packets 0 bytes 0 accept comment "Airplay RTSP"
                ip6 saddr fe80::/10 tcp dport 5000 counter packets 0 bytes 0 accept comment "Airplay RTSP"
                ip6 saddr { 2606:c380:c:1::fffc, 2606:c380:c001:3::/64 } udp dport 6001-6101 counter packets 0 bytes 0 accept comment "Airplay UDP Transport"
                ip6 saddr fe80::/10 udp dport 6001-6101 counter packets 0 bytes 0 accept comment "Airplay UDP Transport"
                ip6 saddr { 2606:c380:c:1::fffc, 2606:c380:c001:3::/64 } tcp dport 22 counter packets 0 bytes 0 accept comment "SSH"
                tcp dport 53 counter packets 0 bytes 0 accept comment "DNS"
                udp dport 53 counter packets 0 bytes 0 accept comment "DNS"
                ip6 daddr ff02::fb udp dport 5353 counter packets 50 bytes 14633 accept comment "mDNS"
                ip6 saddr { 2606:c380:c:1::fffc, 2606:c380:c001:3::/64 } tcp sport 123 counter packets 0 bytes 0 accept comment "NTP"
                tcp dport 1024-65535 tcp flags syn counter packets 0 bytes 0 accept
                tcp dport { 110, 113, 137-139, 445, 585, 1433-1434, 2967, 5900, 8763, 27020-27050, 50010, 50020, 50070, 50075, 50090 } goto drop-no-log
                udp dport { 67, 137-139, 161-162, 631, 1026, 1228, 1433-1434, 1534, 1900, 2222, 5060, 5678, 6969, 8763, 10001, 17500, 20720-27050, 34196, 51729, 57621 } goto drop-no-log
                goto log-and-reject
        }

        chain FORWARD {
                type filter hook forward priority filter; policy drop;
                goto log-and-reject
        }

        chain OUTPUT {
                type filter hook output priority filter; policy accept;
        }
}
table ip nat {
        chain DOCKER {
                iifname "docker0" counter packets 0 bytes 0 return
        }

        chain POSTROUTING {
                type nat hook postrouting priority srcnat; policy accept;
                oifname != "docker0" ip saddr 172.17.0.0/16 counter packets 0 bytes 0 masquerade
        }

        chain PREROUTING {
                type nat hook prerouting priority dstnat; policy accept;
                fib daddr type local counter packets 0 bytes 0 jump DOCKER
        }

        chain OUTPUT {
                type nat hook output priority -100; policy accept;
                ip daddr != 127.0.0.0/8 fib daddr type local counter packets 0 bytes 0 jump DOCKER
        }
}

If only Linux firewalls had anchors like OpenBSD's pf does this whole mess could have been worked around.

Subscribe via RSS. Send me a comment.