/*
* Virtual network driver for conversing with remote driver backends.
*
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <linux/if_ether.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/moduleparam.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <net/ip.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/page.h>
#include <xen/grant_table.h>
#include <xen/interface/io/netif.h>
#include <xen/interface/memory.h>
#include <xen/interface/grant_table.h>
static const struct ethtool_ops xennet_ethtool_ops;
struct netfront_cb {
struct page *page;
unsigned offset;
};
#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
#define RX_COPY_THRESHOLD 256
#define GRANT_INVALID_REF 0
#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
struct netfront_info {
struct list_head list;
struct net_device *netdev;
struct napi_struct napi;
unsigned int evtchn;
struct xenbus_device *xbdev;
spinlock_t tx_lock;
struct xen_netif_tx_front_ring tx;
int tx_ring_ref;
/*
* {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
* are linked from tx_skb_freelist through skb_entry.link.
*
* NB. Freelist index entries are always going to be less than
* PAGE_OFFSET, whereas pointers to skbs will always be equal or
* greater than PAGE_OFFSET: we use this property to distinguish
* them.
*/
union skb_entry {
struct sk_buff *skb;
unsigned long link;
} tx_skbs[NET_TX_RING_SIZE];
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
unsigned tx_skb_freelist;
spinlock_t rx_lock ____cacheline_aligned_in_smp;
struct xen_netif_rx_front_ring rx;
int rx_ring_ref;
/* Receive-ring batched refills. */
#define RX_MIN_TARGET 8
#define RX_DFL_MIN_TARGET 64
#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
unsigned rx_min_target, rx_max_target, rx_target;
struct sk_buff_head rx_batch;
struct timer_list rx_refill_timer;
struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
unsigned long rx_pfn_array[NET_RX_RING_SIZE];
struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
struct mmu_update rx_mmu[NET_RX_RING_SIZE];
/* Statistics */
unsigned long rx_gso_checksum_fixup;
};
struct netfront_rx_info {
struct xen_netif_rx_response rx;
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
};
static void skb_entry_set_link(union skb_entry *list, unsigned short id)
{
list->link = id;
}
static int skb_entry_is_link(const union skb_entry *list)
{
BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
return (unsigned long)list->skb < PAGE_OFFSET;
}
/*
* Access macros for acquiring freeing slots in tx_skbs[].
*/
static void add_id_to_freelist(unsigned *head, union skb_entry *list,
unsigned short id)
{
skb_entry_set_link(&list[id], *head);
*head = id;
}
static unsigned short get_id_from_freelist(unsigned *head,
union skb_entry *list)
{
unsigned int id = *head;
*head = list[id].link;
return id;
}
static int xennet_rxidx(RING_IDX idx)
{
return idx & (NET_RX_RING_SIZE - 1);
}
static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
RING_IDX ri)
{
int i = xennet_rxidx(ri);
struct sk_buff *skb = np->rx_skbs[i];
np->rx_skbs[i] = NULL;
return skb;
}
static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
RING_IDX ri)
{
int i = xennet_rxidx(ri);
grant_ref_t ref = np->grant_rx_ref[i];
np->grant_rx_ref[i] = GRANT_INVALID_REF;
return ref;
}
#ifdef CONFIG_SYSFS
static int xennet_sysfs_addif(struct net_device *netdev);
static void xennet_sysfs_delif(struct net_device *netdev);
#else /* !CONFIG_SYSFS */
#define xennet_sysfs_addif(dev) (0)
#define xennet_sysfs_delif(dev) do { } while (0)
#endif
static int xennet_can_sg(struct net_device *dev)
{
return dev->features & NETIF_F_SG;
}
static void rx_refill_timeout(unsigned long data)
{
struct net_device *dev = (struct net_device *)data;
struct netfront_info *np = netdev_priv(dev);
napi_schedule(&np->napi);
}
static int netfront_tx_slot_available(struct netfront_info *np)
{
return (np->tx.req_prod_pvt - np->tx.rsp_cons) <
(TX_MAX_TARGET - MAX_SKB_FRAGS - 2);
}
static void xennet_maybe_wake_tx(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
if (unlikely(netif_queue_stopped(dev)) &&
netfront_tx_slot_available(np) &&
likely(netif_running(dev)))
netif_wake_queue(dev);
}
static void xennet_alloc_rx_buffers(struct net_device *dev)
{
unsigned short id;
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
struct page *page;
int i, batch_target, notify;
RING_IDX req_prod = np->rx.req_prod_pvt;
grant_ref_t ref;
unsigned long pfn;
void *vaddr;
struct xen_netif_rx_request *req;
if (unlikely(!netif_carrier_ok(dev)))
return;
/*
* Allocate skbuffs greedily, even though we batch updates to the
* receive ring. This creates a less bursty demand on the memory
* allocator, so should reduce the chance of failed allocation requests
* both for ourself and for other kernel subsystems.
*/
batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD + NET_IP_ALIGN,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
goto no_skb;
/* Align ip header to a 16 bytes boundary */
skb_reserve(skb, NET_IP_ALIGN);
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
if (!page) {
kfree_skb(skb);
no_skb:
/* Any skbuffs queued for refill? Force them out. */
if (i != 0)
goto refill;
/* Could not allocate any skbuffs. Try again later. */
mod_timer(&np->rx_refill_timer,
jiffies + (HZ/10));
break;
}
skb_shinfo(skb)->frags[0].page = page;
skb_shinfo(skb)->nr_frags = 1;
__skb_queue_tail(&np->rx_batch, skb);
}
/* Is the batch large enough to be worthwhile? */
if (i < (np->rx_target/2)) {
if (req_prod > np->rx.sring->req_prod)
goto push;
return;
}
/* Adjust our fill target if we risked running out of buffers. */
if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
((np->rx_target *= 2) > np->rx_max_target))
np->rx_target = np->rx_max_target;
refill:
for (i = 0; ; i++) {
skb = __skb_dequeue(&np->rx_batch);
if (skb == NULL)
break;
skb->dev = dev;
id = xennet_rxidx(req_prod + i);
BUG_ON(np->rx_skbs[id]);
np->rx_skbs[id] = skb;
ref = gnttab_claim_grant_reference(&np->gref_rx_head);
BUG_ON((signed short)ref < 0);
np->grant_rx_ref[id] = ref;
pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
vaddr = page_address(skb_shinfo(skb)->frags[0].page);
req = RING_GET_REQUEST(&np->rx, req_prod + i);
gnttab_grant_foreign_access_ref(ref,
np->xbdev->otherend_id,
pfn_to_mfn(pfn),
0);
req->id = id;
req->gref = ref;
}
wmb(); /* barrier so backend seens requests */
/* Above is a suitable barrier to ensure backend will see requests. */
np->rx.req_prod_pvt = req_prod + i;
push:
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
if (notify)
notify_remote_via_irq(np->netdev->irq);
}
static int xennet_open(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
napi_enable(&np->napi);
spin_lock_bh(&np->rx_lock);
if (netif_carrier_ok(dev)) {
xennet_alloc_rx_buffers(dev);
np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
napi_schedule(&np->napi);
}
spin_unlock_bh(&np->rx_lock);
netif_start_queue(dev);
return 0;
}
static void xennet_tx_buf_gc(struct net_device *dev)
{
RING_IDX cons, prod;
unsigned short id;
struct netfront_info *np = netdev
评论0