[ovs-dev] [PATCH V2 09/10] dpif-netdev: Add DPDK netdev.
Thomas Graf
tgraf at redhat.com
Fri Mar 21 23:13:09 UTC 2014
On 03/21/2014 07:03 PM, Pravin wrote:
Some comments below. I'll start experimenting with this as well.
> +
> +#define MTU_TO_MAX_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
> +#define MBUF_SIZE(mtu) (MTU_TO_MAX_LEN(mtu) + (512) + \
> + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
> +
> +/* TODO: mempool size should be based on system resources. */
> +#define NB_MBUF (4096 * 64)
> +#define MP_CACHE_SZ (256 * 2)
Would be great to move this into ovsdb.
> +#define SOCKET0 0
> +
> +#define NON_PMD_THREAD_TX_QUEUE 0
> +
> +/* TODO: Needs per NIC value for these constants. */
> +#define RX_PTHRESH 32 /* Default values of RX prefetch threshold reg. */
> +#define RX_HTHRESH 32 /* Default values of RX host threshold reg. */
> +#define RX_WTHRESH 16 /* Default values of RX write-back threshold reg. */
> +
> +#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */
> +#define TX_HTHRESH 0 /* Default values of TX host threshold reg. */
> +#define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */
> +
> +static const struct rte_eth_conf port_conf = {
> + .rxmode = {
> + .mq_mode = ETH_MQ_RX_RSS,
> + .split_hdr_size = 0,
> + .header_split = 0, /* Header Split disabled */
> + .hw_ip_checksum = 0, /* IP checksum offload enabled */
Comment does not seem to match reality ;-)
> + .hw_vlan_filter = 0, /* VLAN filtering disabled */
> + .jumbo_frame = 0, /* Jumbo Frame Support disabled */
> + .hw_strip_crc = 0, /* CRC stripped by hardware */
I understand the reason to disable jumbo frames. can you elaborate
on the reason to disable IP checksum and HW VLAN strip?
> + },
> + .rx_adv_conf = {
> + .rss_conf = {
> + .rss_key = NULL,
> + .rss_hf = ETH_RSS_IPV4_TCP | ETH_RSS_IPV4 | ETH_RSS_IPV6,
This should be configurable at some point too to allow enabling
UDPv4 RSS to scale UDP encap across queues.
> + },
> + },
> + .txmode = {
> + .mq_mode = ETH_MQ_TX_NONE,
> + },
> +};
> +
> +/* TODO: use dpdk malloc for entire OVS. infact huge page shld be used
> + * for all other sengments data, bss and text. */
The assimilation has begun ;-)
> +static int
> +netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args)
> +{
> + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
> +
> + ovs_mutex_lock(&dev->mutex);
> +
> + /* TODO: Allow to configure number of queues. */
Same for socket id and maybe burst len
> + smap_add_format(args, "configured_rx_queues", "%u", netdev_->n_rxq);
> + smap_add_format(args, "configured_tx_queues", "%u", netdev_->n_rxq);
> + ovs_mutex_unlock(&dev->mutex);
> +
> + return 0;
> +}
> +
> +
>> +
> +inline static void
> +dpdk_queue_pkt(struct netdev_dpdk *dev, int qid,
> + struct rte_mbuf *pkt)
> +{
> + struct dpdk_tx_queue *txq = &dev->tx_q[qid];
> + uint64_t diff_tsc;
> + uint64_t cur_tsc;
> + uint32_t nb_tx;
> +
> + rte_spinlock_lock(&txq->tx_lock);
> + txq->burst_pkts[txq->count++] = pkt;
I think burst_pkts should be converted to rte_ring in a follow
up series to allow for lockless enqueue and dequeue. Taking
a spinlock for every packet enqueue is painful.
> + if (txq->count == MAX_TX_QUEUE_LEN) {
> + goto flush;
> + }
> + cur_tsc = rte_get_timer_cycles();
> + if (txq->count == 1) {
> + txq->tsc = cur_tsc;
> + }
> + diff_tsc = cur_tsc - txq->tsc;
> + if (diff_tsc >= DRAIN_TSC) {
> + goto flush;
> + }
> + rte_spinlock_unlock(&txq->tx_lock);
> + return;
> +
> +flush:
> + nb_tx = rte_eth_tx_burst(dev->port_id, qid, txq->burst_pkts, txq->count);
> + if (nb_tx != txq->count) {
> + /* free buffers if we couldn't transmit packets */
> + rte_mempool_put_bulk(dev->dpdk_mp->mp,
> + (void **) &txq->burst_pkts[nb_tx],
> + (txq->count - nb_tx));
> + }
> + txq->count = 0;
> + rte_spinlock_unlock(&txq->tx_lock);
> +}
> +
More information about the dev
mailing list