Use alloc_netdev_mqs to incorporate multiple TX/RX networking queues

In the previous article (expained here), we described the alloc_netdev_mqs API and how it allows the networking device to create multiple transmit and receive queues for itself which leads to better network resource usage.

In our current series of articles, where we are adding functionality to a simple networking loopback driver, having multiple transmit and receive queues might not be a great performance improvement booster. But, the idea is to show how a regular networking driver will utilize the same and we modify the virtual networking loopback device driver (provided here) to support multiple networking transmit and receive queues.

We also attempt to simulate how packets might be queued from hardware to a receive queue and map the loopback packet to the receive queue. It can be seen in code that the driver still has its own transmit and receive queues and the packet that are now received on a particular receive queue is mapped to a specific queue in the networking device driver.

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/uaccess.h>
#include <linux/sockios.h>

#define DRV_NAME "vivek_net"

MODULE_AUTHOR("Vivekananda Uppunda");
MODULE_DESCRIPTION("Virtual netdev with TX/RX queues for network stack");
MODULE_LICENSE("GPL");

#define VNET_IOCTL_RESET_STATS   (SIOCDEVPRIVATE + 0)

#define VNET_TX_QUEUES 4
#define VNET_RX_QUEUES 2

spinlock_t priv_lock;

struct vnet_priv {
    struct sk_buff_head tx_ring[VNET_TX_QUEUES]; /* driver-level TX queues */
    struct sk_buff_head rx_ring[VNET_RX_QUEUES]; /* driver-level RX queues */
};

/* -------------------------------------------------- */
/* IOCTL function */
/* -------------------------------------------------- */
static int vnet_siocdevprivate(struct net_device *dev,
                               struct ifreq *ifr,
			       void __user *data,
                               int cmd)
{
    pr_info("vnet_siocdevprivate\n");

    spin_lock(&priv_lock);

    switch (cmd) {
    case VNET_IOCTL_RESET_STATS:
        memset(&dev->stats, 0, sizeof(dev->stats));
        pr_info("%s: stats reset via ioctl\n", dev->name);
        break;

    default:
        pr_info("%s: default not supported\n", dev->name);
        return -EOPNOTSUPP;
    }
    spin_unlock(&priv_lock);
    return 0;
}

static void vnet_process_rx_ring(struct net_device *dev)
{
    struct vnet_priv *priv = netdev_priv(dev);
    struct sk_buff *skb;
    int i;

    for (i = 0; i < VNET_RX_QUEUES; i++) {
        while ((skb = skb_dequeue(&priv->rx_ring[i])) != NULL) {
            /* Inject into networking stack */
            skb->dev = dev;
            skb->protocol = eth_type_trans(skb, dev);
            skb->ip_summed = CHECKSUM_UNNECESSARY;

            dev->stats.rx_packets++;
            dev->stats.rx_bytes += skb->len;

            pr_info("%s: RX queue %d delivering packet (len=%u)\n",
                    dev->name, i, skb->len);

            netif_rx(skb);
        }
    }
}

static netdev_tx_t vnet_start_xmit(struct sk_buff *skb,
                                   struct net_device *dev)
{
    struct vnet_priv *priv = netdev_priv(dev);
    unsigned int q = skb_get_queue_mapping(skb);
    struct sk_buff *copy;

    if (q >= VNET_TX_QUEUES)
        q = 0;

    spin_lock(&priv_lock);

    dev->stats.tx_packets++;
    dev->stats.tx_bytes += skb->len;

    pr_info("%s: TX queue %d enqueue packet (len=%u)\n",
            dev->name, q, skb->len);

    /* Copy skb into driver-owned buffer (simulate DMA mapping) */
    copy = skb_copy(skb, GFP_ATOMIC);
    if (copy) {
        unsigned int rx_q = q % VNET_RX_QUEUES;
        skb_queue_tail(&priv->rx_ring[rx_q], copy);
        pr_info("%s: copied to RX queue %d\n", dev->name, rx_q);
    }

    /* Free original skb (stack no longer owns it) */
    dev_kfree_skb(skb);

    /* Process RX ring immediately (loopback model) */
    vnet_process_rx_ring(dev);
    spin_unlock(&priv_lock);
    
    return NETDEV_TX_OK;
}

static int vnet_open(struct net_device *dev)
{
    struct vnet_priv *priv = netdev_priv(dev);
    int i;

    for (i = 0; i < VNET_TX_QUEUES; i++)
        skb_queue_head_init(&priv->tx_ring[i]);
    for (i = 0; i < VNET_RX_QUEUES; i++)
        skb_queue_head_init(&priv->rx_ring[i]);

    netif_start_queue(dev);
    pr_info("%s: device opened\n", dev->name);
    return 0;
}

static int vnet_stop(struct net_device *dev)
{
    struct vnet_priv *priv = netdev_priv(dev);
    struct sk_buff *skb;
    int i;

    netif_stop_queue(dev);

    for (i = 0; i < VNET_TX_QUEUES; i++)
        while ((skb = skb_dequeue(&priv->tx_ring[i])) != NULL)
            dev_kfree_skb(skb);
    for (i = 0; i < VNET_RX_QUEUES; i++)
        while ((skb = skb_dequeue(&priv->rx_ring[i])) != NULL)
            dev_kfree_skb(skb);

    pr_info("%s: device closed\n", dev->name);
    return 0;
}

static const struct net_device_ops vnet_netdev_ops = {
    .ndo_open             = vnet_open,
    .ndo_stop             = vnet_stop,
    .ndo_start_xmit       = vnet_start_xmit,
    .ndo_siocdevprivate   = vnet_siocdevprivate,
};

static void vnet_setup(struct net_device *dev)
{
    ether_setup(dev);
    dev->netdev_ops = &vnet_netdev_ops;
    dev->flags |= IFF_NOARP;
    dev->features |= NETIF_F_HW_CSUM;
    eth_hw_addr_random(dev);

    dev->num_tx_queues = VNET_TX_QUEUES;
    dev->num_rx_queues = VNET_RX_QUEUES;
}

static struct net_device *vnet_dev;

static int __init vnet_init(void)
{
    int ret;

    vnet_dev = alloc_netdev_mqs(sizeof(struct vnet_priv),
                                DRV_NAME"%d",
                                NET_NAME_UNKNOWN,
                                vnet_setup,
                                VNET_TX_QUEUES,
                                VNET_RX_QUEUES);
    if (!vnet_dev)
        return -ENOMEM;

    ret = register_netdev(vnet_dev);
    if (ret) {
        free_netdev(vnet_dev);
        return ret;
    }

    spin_lock_init(&priv_lock);
    pr_info(DRV_NAME ": registered with %d TX and %d RX queues\n",
            VNET_TX_QUEUES, VNET_RX_QUEUES);
    return 0;
}

static void __exit vnet_exit(void)
{
    unregister_netdev(vnet_dev);
    free_netdev(vnet_dev);
    pr_info(DRV_NAME ": module unloaded\n");
}

module_init(vnet_init);
module_exit(vnet_exit);

The above code now creates 4 transmit and 2 receive queues for the networking device instance, numbers which are just chosen randomly for this case. We have also used spin_locks for the first time in this code to make sure network structures are not invoked simultaneously and lead to memory corruption.

In the next set of articles, we will discuss adding another important functionality that is normally seen in networking driver which is netlink. We will incorporate a simple Generic netlink interface to the virtual loop back driver and provide an netlink echo command to check the working along with a sample program.

Adding Generic Netlink interface to loopback driver

Leave a Reply

Your email address will not be published. Required fields are marked *