[tulip] Permanent interrupt mitigation with 21143 quad card
Ben Greear
greearb@candelatech.com
Wed Nov 13 13:23:01 2002
This is a multi-part message in MIME format.
--------------070406040307030501010805
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
Luke Stratman wrote:
> Hello,
>
> I recently sent an email to this list detailing my efforts to obtain
> wire speed (100 Mbps) routing with 21143 quad interface cards in several
> Linux boxes that are to act as routers in a network testbed environment.
> While I'm able to accomplish this if only two of the interfaces are
> active at one time (i.e. one receiving and one sending), if all four
> interfaces are active (two receiving and two sending), my throughput
> maxes out at about 72 Mbps. The hardware for each router certainly
> seems up to the task (an Asus A7V266-E motherboard, Athlon XP 2000+, and
> 256MB of PC2100 RAM), but the interrupt load that occurs when all the
> interfaces are active surpasses even this setup.
I tried permanently enabling it, and it did not help. I
might have not written the code correctly though...
I did test with a NAPI-ized version, which should do the hw-mitigation.
It increased throughput of small packets by about 20%, but has little
impact on larger packets.
With all the tuning I could manage, and using pktgen for a traffic
generator, I could get around 30Mbps tx + rx on 8 ports simultaneously
on a single machine. I think it is the PCI bus on my machine that
is killing me (32/33, Athlon 1.8).
I'm attaching an un-official and mightily hacked up tulip driver
that does NAPI. It should apply against 2.4.20-rc1, and maybe others
in the 2.4.19-preX range. I'd be interested to know how it runs for
you.
Ben
--
Ben Greear <greearb@candelatech.com> <Ben_Greear AT excite.com>
President of Candela Technologies Inc http://www.candelatech.com
ScryMUD: http://scry.wanfear.com http://scry.wanfear.com/~greear
--------------070406040307030501010805
Content-Type: text/plain;
name="tulip_2.4.19.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="tulip_2.4.19.patch"
--- linux-2.4.19.p3/drivers/net/tulip/interrupt.c Tue Nov 5 21:33:22 2002
+++ linux-2.4.19.p4/drivers/net/tulip/interrupt.c Tue Nov 12 19:00:07 2002
@@ -1,4 +1,4 @@
-/*
+/* -*-linux-c-*-
drivers/net/tulip/interrupt.c
Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
@@ -23,9 +23,10 @@
int tulip_rx_copybreak;
unsigned int tulip_max_interrupt_work;
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-
+#ifdef USE_MITIGATION
#define MIT_SIZE 15
+#define MIT_TABLE 15 /* We use 0 or max */
+
unsigned int mit_table[MIT_SIZE+1] =
{
/* CRS11 21143 hardware Mitigation Control Interrupt
@@ -55,7 +56,7 @@
0x80BD0000,
0x80CF0000,
0x80DF0000,
-// 0x80FF0000 /* RX time = 16, RX pkts = 7, CM = 1 */
+ //0x80FF0000 /* RX time = 16, RX pkts = 7, CM = 1 */
0x80F10000 /* RX time = 16, RX pkts = 0, CM = 1 */
};
#endif
@@ -74,9 +75,27 @@
struct sk_buff *skb;
dma_addr_t mapping;
- skb = tp->rx_buffers[entry].skb = dev_alloc_skb(PKT_BUF_SZ);
- if (skb == NULL)
+#ifdef TULIP_SKB_RECYCLING
+ skb = skb_dequeue(&(tp->tulip_recycle[smp_processor_id()].list));
+ if (skb) {
+ tp->rx_buffers[entry].skb = skb;
+ skb_headerinit(skb, NULL, 0);
+ skb->data = skb->head;
+ skb->tail = skb->head;
+ skb->len = 0;
+ }
+ else {
+ skb = dev_alloc_skb(PKT_BUF_SZ);
+ tp->rx_buffers[entry].skb = skb;
+ }
+#else
+ skb = tp->rx_buffers[entry].skb = dev_alloc_skb(PKT_BUF_SZ);
+#endif
+
+ if (skb == NULL) {
+ printk("tulip: Error, couldn't refill buffer: %i\n", entry);
break;
+ }
mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ,
PCI_DMA_FROMDEVICE);
@@ -99,16 +118,36 @@
return refilled;
}
+void oom_timer(unsigned long data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ netif_rx_schedule(dev);
+}
+
-static int tulip_rx(struct net_device *dev)
+int tulip_poll(struct net_device *dev, int *budget)
{
struct tulip_private *tp = (struct tulip_private *)dev->priv;
int entry = tp->cur_rx % RX_RING_SIZE;
- int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
+ int rx_work_limit = *budget;
int received = 0;
+ int rx_byte_sofar = 0;
+ int rx_byte_limit;
+
+#ifdef EXTRA_STATS
+ tp->stats_poll_starts++;
+#endif
+
+ if (rx_work_limit > dev->quota)
+ rx_work_limit = dev->quota;
+
+ /* Limit us to rx_work_limit * 512. This should let us handle
+ * more smaller packets, but less of the larger ones. This seems
+ * to help keep things working well under various packet sizes.
+ */
+ rx_byte_limit = rx_work_limit << 9;
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- int drop = 0, mit_sel = 0;
+#ifdef USE_MITIGATION
/* that one buffer is needed for mit activation; or might be a
bug in the ring buffer code; check later -- JHS*/
@@ -118,177 +157,271 @@
if (tulip_debug > 4)
printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry,
- tp->rx_ring[entry].status);
- /* If we own the next entry, it is a new packet. Send it up. */
- while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) {
- s32 status = le32_to_cpu(tp->rx_ring[entry].status);
-
- if (tulip_debug > 5)
- printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n",
- dev->name, entry, status);
- if (--rx_work_limit < 0)
- break;
- if ((status & 0x38008300) != 0x0300) {
- if ((status & 0x38000300) != 0x0300) {
- /* Ingore earlier buffers. */
- if ((status & 0xffff) != 0x7fff) {
- if (tulip_debug > 1)
- printk(KERN_WARNING "%s: Oversized Ethernet frame "
- "spanned multiple buffers, status %8.8x!\n",
- dev->name, status);
- tp->stats.rx_length_errors++;
- }
- } else if (status & RxDescFatalErr) {
+ tp->rx_ring[entry].status);
+
+
+ do {
+ /* Acknowledge current RX interrupt sources. */
+ outl((RxIntr | RxNoBuf), dev->base_addr + CSR5);
+
+
+ /* If we own the next entry, it is a new packet. Send it up. */
+ while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) {
+ s32 status = le32_to_cpu(tp->rx_ring[entry].status);
+
+
+ if (tp->dirty_rx + RX_RING_SIZE == tp->cur_rx)
+ break;
+
+ if (tulip_debug > 5)
+ printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n",
+ dev->name, entry, status);
+ if (--rx_work_limit < 0)
+ goto not_done;
+
+ if (rx_byte_sofar > rx_byte_limit)
+ goto not_done;
+
+ if ((status & 0x38008300) != 0x0300) {
+ if ((status & 0x38000300) != 0x0300) {
+ /* Ignore earlier buffers. */
+ if ((status & 0xffff) != 0x7fff) {
+ if (tulip_debug > 1)
+ printk(KERN_WARNING "%s: Oversized Ethernet frame "
+ "spanned multiple buffers, status %8.8x!\n",
+ dev->name, status);
+ tp->stats.rx_length_errors++;
+ }
+ } else if (status & RxDescFatalErr) {
/* There was a fatal error. */
- if (tulip_debug > 2)
- printk(KERN_DEBUG "%s: Receive error, Rx status %8.8x.\n",
- dev->name, status);
- tp->stats.rx_errors++; /* end of a packet.*/
- if (status & 0x0890) tp->stats.rx_length_errors++;
- if (status & 0x0004) tp->stats.rx_frame_errors++;
- if (status & 0x0002) tp->stats.rx_crc_errors++;
- if (status & 0x0001) tp->stats.rx_fifo_errors++;
- }
- } else {
- /* Omit the four octet CRC from the length. */
- short pkt_len = ((status >> 16) & 0x7ff) - 4;
- struct sk_buff *skb;
+ if (tulip_debug > 2)
+ printk(KERN_DEBUG "%s: Receive error, Rx status %8.8x.\n",
+ dev->name, status);
+ tp->stats.rx_errors++; /* end of a packet.*/
+ if (status & 0x0890) tp->stats.rx_length_errors++;
+ if (status & 0x0004) tp->stats.rx_frame_errors++;
+ if (status & 0x0002) tp->stats.rx_crc_errors++;
+ if (status & 0x0001) tp->stats.rx_fifo_errors++;
+ }
+ } else {
+ /* Omit the four octet CRC from the length. */
+ short pkt_len = ((status >> 16) & 0x7ff) - 4;
+ struct sk_buff *skb = NULL;
#ifndef final_version
- if (pkt_len > 1518) {
- printk(KERN_WARNING "%s: Bogus packet size of %d (%#x).\n",
- dev->name, pkt_len, pkt_len);
- pkt_len = 1518;
- tp->stats.rx_length_errors++;
- }
+ if (pkt_len > 1518) {
+ printk(KERN_WARNING "%s: Bogus packet size of %d (%#x).\n",
+ dev->name, pkt_len, pkt_len);
+ pkt_len = 1518;
+ tp->stats.rx_length_errors++;
+ }
#endif
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- drop = atomic_read(&netdev_dropping);
- if (drop)
- goto throttle;
-#endif
- /* Check if the packet is long enough to accept without copying
- to a minimally-sized skbuff. */
- if (pkt_len < tulip_rx_copybreak
- && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
- skb->dev = dev;
- skb_reserve(skb, 2); /* 16 byte align the IP header */
- pci_dma_sync_single(tp->pdev,
- tp->rx_buffers[entry].mapping,
- pkt_len, PCI_DMA_FROMDEVICE);
+ /* Check if the packet is long enough to accept without copying
+ to a minimally-sized skbuff. */
+#ifdef TULIP_SKB_RECYCLING
+ if (pkt_len < tulip_rx_copybreak) {
+ /* Allocate an skb from our private queue if possible */
+ skb = skb_dequeue(&(tp->tulip_recycle[smp_processor_id()].list));
+ if (skb) {
+ skb_headerinit(skb, NULL, 0); /* clean state */
+ skb->data = skb->head;
+ skb->tail = skb->head;
+ skb->len = 0;
+ }
+ }
+#endif
+ if ((pkt_len < tulip_rx_copybreak)
+ && ((skb != NULL)
+ || ((skb = dev_alloc_skb(pkt_len + 2)) != NULL))) {
+ skb->dev = dev;
+ skb_reserve(skb, 2); /* 16 byte align the IP header */
+ pci_dma_sync_single(tp->pdev,
+ tp->rx_buffers[entry].mapping,
+ pkt_len, PCI_DMA_FROMDEVICE);
#if ! defined(__alpha__)
- eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail,
- pkt_len, 0);
- skb_put(skb, pkt_len);
+ eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail,
+ pkt_len, 0);
+ skb_put(skb, pkt_len);
#else
- memcpy(skb_put(skb, pkt_len),
- tp->rx_buffers[entry].skb->tail,
- pkt_len);
-#endif
- } else { /* Pass up the skb already on the Rx ring. */
- char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
- pkt_len);
+ memcpy(skb_put(skb, pkt_len),
+ tp->rx_buffers[entry].skb->tail,
+ pkt_len);
+#endif
+
+ } else { /* Pass up the skb already on the Rx ring. */
+ char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
+ pkt_len);
#ifndef final_version
- if (tp->rx_buffers[entry].mapping !=
- le32_to_cpu(tp->rx_ring[entry].buffer1)) {
- printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
- "do not match in tulip_rx: %08x vs. %08x %p / %p.\n",
- dev->name,
- le32_to_cpu(tp->rx_ring[entry].buffer1),
- tp->rx_buffers[entry].mapping,
- skb->head, temp);
+ if (tp->rx_buffers[entry].mapping !=
+ le32_to_cpu(tp->rx_ring[entry].buffer1)) {
+ printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
+ "do not match in tulip_rx: %08x vs. %08x %p / %p.\n",
+ dev->name,
+ le32_to_cpu(tp->rx_ring[entry].buffer1),
+ tp->rx_buffers[entry].mapping,
+ skb->head, temp);
+ }
+#endif
+
+ pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping,
+ PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+
+ tp->rx_buffers[entry].skb = NULL;
+ tp->rx_buffers[entry].mapping = 0;
}
+
+#ifdef TULIP_SKB_RECYCLING
+ skb->tag = smp_processor_id();
+ tp->cnt[skb->tag]++;
+ skb->recycle_dev = dev;
#endif
- pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping,
- PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+ skb->protocol = eth_type_trans(skb, dev);
- tp->rx_buffers[entry].skb = NULL;
- tp->rx_buffers[entry].mapping = 0;
- }
- skb->protocol = eth_type_trans(skb, dev);
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- mit_sel =
-#endif
- netif_rx(skb);
-
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- switch (mit_sel) {
- case NET_RX_SUCCESS:
- case NET_RX_CN_LOW:
- case NET_RX_CN_MOD:
- break;
-
- case NET_RX_CN_HIGH:
- rx_work_limit -= NET_RX_CN_HIGH; /* additional*/
- break;
- case NET_RX_DROP:
- rx_work_limit = -1;
- break;
- default:
- printk("unknown feedback return code %d\n", mit_sel);
- break;
- }
+ netif_receive_skb(skb);
- drop = atomic_read(&netdev_dropping);
- if (drop) {
-throttle:
- rx_work_limit = -1;
- mit_sel = NET_RX_DROP;
-
- if (tp->fc_bit) {
- long ioaddr = dev->base_addr;
-
- /* disable Rx & RxNoBuf ints. */
- outl(tulip_tbl[tp->chip_id].valid_intrs&RX_A_NBF_STOP, ioaddr + CSR7);
- set_bit(tp->fc_bit, &netdev_fc_xoff);
- }
- }
+ dev->last_rx = jiffies;
+ tp->stats.rx_packets++;
+ tp->stats.rx_bytes += pkt_len;
+ rx_byte_sofar += pkt_len;
+ }
+ received++;
+#ifdef EXTRA_STATS
+ tp->stats_poll_pkts++;
+#ifdef USE_MITIGATION
+ if(tp->mit_on) tp->stats_poll_pkts_mit++;
#endif
- dev->last_rx = jiffies;
- tp->stats.rx_packets++;
- tp->stats.rx_bytes += pkt_len;
+#endif
+ entry = (++tp->cur_rx) % RX_RING_SIZE;
+ if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/4)
+ tulip_refill_rx(dev);
+
}
- received++;
- entry = (++tp->cur_rx) % RX_RING_SIZE;
- }
-#ifdef CONFIG_NET_HW_FLOWCONTROL
+
+ /* New ack strategy... irq does not ack Rx any longer
+ hopefully this helps */
+
+ /* Really bad things can happen here... If new packet arrives
+ * and an irq arrives (tx or just due to occasionally unset
+ * mask), it will be acked by irq handler, but new thread
+ * is not scheduled. It is major hole in design.
+ * No idea how to fix this if "playing with fire" will fail
+ * tomorrow (night 011029). If it will not fail, we won
+ * finally: amount of IO did not increase at all. */
+ } while ((inl(dev->base_addr + CSR5) & RxIntr));
+
+/* done: */
+
+#ifdef USE_MITIGATION
/* We use this simplistic scheme for IM. It's proven by
real life installations. We can have IM enabled
- continuesly but this would cause unnecessary latency.
- Unfortunely we can't use all the NET_RX_* feedback here.
- This would turn on IM for devices that is not contributing
- to backlog congestion with unnecessary latency.
+ continuesly but this would cause unnecessary latency.
+ Unfortunely we can't use all the NET_RX_* feedback here.
+ This would turn on IM for devices that is not contributing
+ to backlog congestion with unnecessary latency.
We monitor the the device RX-ring and have:
HW Interrupt Mitigation either ON or OFF.
- ON: More then 1 pkt received (per intr.) OR we are dropping
+ ON: More then 1 pkt received (per intr.) OR we are dropping
OFF: Only 1 pkt received
-
+
Note. We only use min and max (0, 15) settings from mit_table */
if( tp->flags & HAS_INTR_MITIGATION) {
- if((received > 1 || mit_sel == NET_RX_DROP)
- && tp->mit_sel != 15 ) {
- tp->mit_sel = 15;
- tp->mit_change = 1; /* Force IM change */
+ if( received > 1 ) {
+ if( ! tp->mit_on ) {
+ tp->mit_on = 1;
+ outl(mit_table[MIT_TABLE], dev->base_addr + CSR11);
+ }
}
- if((received <= 1 && mit_sel != NET_RX_DROP) && tp->mit_sel != 0 ) {
- tp->mit_sel = 0;
- tp->mit_change = 1; /* Force IM change */
+ else {
+ if( tp->mit_on ) {
+ tp->mit_on = 0;
+ outl(0, dev->base_addr + CSR11);
+ }
}
}
- return RX_RING_SIZE+1; /* maxrx+1 */
-#else
- return received;
#endif
+
+ dev->quota -= received;
+ *budget -= received;
+
+ tulip_refill_rx(dev);
+
+ /* If RX ring is not full we are out of memory. */
+ if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) goto oom;
+
+#ifdef EXTRA_STATS
+ if((inl(dev->base_addr + CSR5) & RxIntr)) tp->stats_poll_exit_done_rx_pending++;
+ tp->stats_poll_exit_done++;
+#endif
+
+ /* Remove us from polling list and enable RX intr. */
+
+ netif_rx_complete(dev);
+ outl(tulip_tbl[tp->chip_id].valid_intrs, dev->base_addr+CSR7);
+
+ /* The last op happens after poll completion. Which means the following:
+ * 1. it can race with disabling irqs in irq handler
+ * 2. it can race with dise/enabling irqs in other poll threads
+ * 3. if an irq raised after beginning loop, it will be immediately
+ * triggered here.
+ *
+ * Summarizing: the logic results in some redundant irqs both
+ * due to races in masking and due to too late acking of already
+ * processed irqs. But it must not result in losing events.
+ */
+
+ return 0;
+
+not_done:
+ if (!received) {
+#ifdef EXTRA_STATS
+ tp->stats_poll_zero_rx++;
+#endif
+ /* received = dev->quota; Why existed? --Ben */ /* Not to happen */
+ }
+ else {
+ dev->quota -= received;
+ *budget -= received;
+ }
+
+ if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
+ tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
+ tulip_refill_rx(dev);
+
+ if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) goto oom;
+
+#ifdef EXTRA_STATS
+ tp->stats_poll_exit_not_done++;
+#endif
+ return 1;
+
+
+oom: /* Executed with RX ints disabled */
+ printk("ERROR: tulip: Hit OOM trying to refill rx buffer.\n");
+
+ /* Start timer, stop polling, but do not enable rx interrupts. */
+ mod_timer(&tp->oom_timer, jiffies+1);
+
+ /* Think: timer_pending() was an explicit signature of bug.
+ * Timer can be pending now but fired and completed
+ * before we did netif_rx_complete(). See? We would lose it. */
+
+ /* remove ourselves from the polling list */
+ netif_rx_complete(dev);
+
+#ifdef EXTRA_STATS
+ tp->stats_poll_exit_oom++;
+#endif
+ return 0;
}
static inline void phy_interrupt (struct net_device *dev)
@@ -319,7 +452,6 @@
struct tulip_private *tp = (struct tulip_private *)dev->priv;
long ioaddr = dev->base_addr;
int csr5;
- int entry;
int missed;
int rx = 0;
int tx = 0;
@@ -327,6 +459,7 @@
int maxrx = RX_RING_SIZE;
int maxtx = TX_RING_SIZE;
int maxoi = TX_RING_SIZE;
+ int rxd = 0;
unsigned int work_count = tulip_max_interrupt_work;
/* Let's see whether the interrupt really is for us */
@@ -341,21 +474,32 @@
tp->nir++;
do {
- /* Acknowledge all of the current interrupt sources ASAP. */
- outl(csr5 & 0x0001ffff, ioaddr + CSR5);
+#ifdef EXTRA_STATS
+ if(!rxd)
+ record_interrupt_cause(dev, csr5);
+ else
+ record_interrupt_cause(dev, csr5& 0x0001ff3f);
+#endif
+ if (!rxd && (csr5 & (RxIntr | RxNoBuf))) {
+ rxd++;
+ /* Mask RX intrs and add the device to poll list. */
+ outl(tulip_tbl[tp->chip_id].valid_intrs&~RxPollInt, ioaddr + CSR7);
+ netif_rx_schedule(dev);
+
+ if (!(csr5&~(AbnormalIntr|NormalIntr|RxPollInt|TPLnkPass)))
+ break;
+ }
+
+ /* Acknowledge the interrupt sources we handle here ASAP
+ the poll function does Rx and RxNoBuf acking */
+
+ outl(csr5 & 0x0001ff3f, ioaddr + CSR5);
+
if (tulip_debug > 4)
printk(KERN_DEBUG "%s: interrupt csr5=%#8.8x new csr5=%#8.8x.\n",
- dev->name, csr5, inl(dev->base_addr + CSR5));
-
- if (csr5 & (RxIntr | RxNoBuf)) {
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if ((!tp->fc_bit) ||
- (!test_bit(tp->fc_bit, &netdev_fc_xoff)))
-#endif
- rx += tulip_rx(dev);
- tulip_refill_rx(dev);
- }
+ dev->name, csr5, inl(dev->base_addr + CSR5));
+
if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) {
unsigned int dirty_tx;
@@ -457,15 +601,8 @@
}
if (csr5 & RxDied) { /* Missed a Rx frame. */
tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff;
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (tp->fc_bit && !test_bit(tp->fc_bit, &netdev_fc_xoff)) {
- tp->stats.rx_errors++;
- tulip_start_rxtx(tp);
- }
-#else
tp->stats.rx_errors++;
tulip_start_rxtx(tp);
-#endif
}
/*
* NB: t21142_lnk_change() does a del_timer_sync(), so be careful if this
@@ -499,10 +636,6 @@
if (tulip_debug > 2)
printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
dev->name, csr5);
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (tp->fc_bit && (test_bit(tp->fc_bit, &netdev_fc_xoff)))
- if (net_ratelimit()) printk("BUG!! enabling interupt when FC off (timerintr.) \n");
-#endif
outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
tp->ttimer = 0;
oi++;
@@ -515,11 +648,8 @@
/* Acknowledge all interrupt sources. */
outl(0x8001ffff, ioaddr + CSR5);
if (tp->flags & HAS_INTR_MITIGATION) {
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if(tp->mit_change) {
- outl(mit_table[tp->mit_sel], ioaddr + CSR11);
- tp->mit_change = 0;
- }
+#ifdef USE_MITIGATION
+ outl(mit_table[MIT_TABLE], ioaddr + CSR11);
#else
/* Josip Loncaric at ICASE did extensive experimentation
to develop a good interrupt mitigation setting.*/
@@ -532,10 +662,8 @@
} else {
/* Mask all interrupting sources, set timer to
re-enable. */
-#ifndef CONFIG_NET_HW_FLOWCONTROL
outl(((~csr5) & 0x0001ebef) | AbnormalIntr | TimerInt, ioaddr + CSR7);
outl(0x0012, ioaddr + CSR11);
-#endif
}
break;
}
@@ -545,30 +673,18 @@
break;
csr5 = inl(ioaddr + CSR5);
- } while ((csr5 & (NormalIntr|AbnormalIntr)) != 0);
-
- tulip_refill_rx(dev);
-
- /* check if the card is in suspend mode */
- entry = tp->dirty_rx % RX_RING_SIZE;
- if (tp->rx_buffers[entry].skb == NULL) {
- if (tulip_debug > 1)
- printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", dev->name, tp->nir, tp->cur_rx, tp->ttimer, rx);
- if (tp->chip_id == LC82C168) {
- outl(0x00, ioaddr + CSR7);
- mod_timer(&tp->timer, RUN_AT(HZ/50));
- } else {
- if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) {
- if (tulip_debug > 1)
- printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", dev->name, tp->nir);
- outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt,
- ioaddr + CSR7);
- outl(TimerInt, ioaddr + CSR5);
- outl(12, ioaddr + CSR11);
- tp->ttimer = 1;
- }
- }
- }
+ if (rxd)
+ csr5 &= ~RxPollInt;
+ } while ((csr5 & (TxNoBuf |
+ TxDied |
+ TxIntr |
+ TimerInt |
+ /* Abnormal intr. */
+ RxDied |
+ TxFIFOUnderflow |
+ TxJabber |
+ TPLnkFail |
+ SytemError )) != 0);
if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) {
tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed;
--- linux-2.4.19.p3/drivers/net/tulip/tulip_core.c Tue Nov 5 21:33:22 2002
+++ linux-2.4.19.p4/drivers/net/tulip/tulip_core.c Tue Nov 12 18:59:20 2002
@@ -1,4 +1,4 @@
-/* tulip_core.c: A DEC 21x4x-family ethernet driver for Linux. */
+/* -*-linux-c-*- tulip_core.c: A DEC 21x4x-family ethernet driver for Linux. */
/*
Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
@@ -14,10 +14,6 @@
*/
-#define DRV_NAME "tulip"
-#define DRV_VERSION "0.9.15-pre12"
-#define DRV_RELDATE "Aug 9, 2002"
-
#include <linux/config.h>
#include <linux/module.h>
#include "tulip.h"
@@ -44,12 +40,31 @@
/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
static unsigned int max_interrupt_work = 25;
-#define MAX_UNITS 8
+#define MAX_UNITS 16
/* Used to pass the full-duplex flag, etc. */
static int full_duplex[MAX_UNITS];
static int options[MAX_UNITS];
static int mtu[MAX_UNITS]; /* Jumbo MTU for interfaces. */
+
+/* Only used for SKB_RECYCLE, can't get it to #ifdef out on RH 7.3 */
+/* This is the maximum number of skbs per CPU that the driver will
+ * keep in it's recycle buffer list (per driver instance, ie per port).
+ * Each skb will cost you a little
+ * less than 2k, so if you have little memory and make this huge, bad
+ * things will happen. For 256MB machines running at very high speeds,
+ * 1024 or 2048 may be better. There seems to be no gain at higher
+ * values, at least on 100Mbps nics.
+ */
+static int skb_hotlist = 300;
+
+static int dev_weight = 24;
+#ifdef TULIP_ADJUSTABLE_RING
+static int RX_RING_SIZE = 256;
+static int TX_RING_SIZE = 128;
+#endif
+
+
/* The possible media types that can be set in options[] are: */
const char * const medianame[32] = {
"10baseT", "10base2", "AUI", "100baseTx",
@@ -105,6 +120,13 @@
/* Time in jiffies before concluding the transmitter is hung. */
#define TX_TIMEOUT (4*HZ)
+MODULE_PARM(skb_hotlist, "i");
+MODULE_PARM(dev_weight, "i");
+
+#ifdef TULIP_ADJUSTABLE_RING
+MODULE_PARM(RX_RING_SIZE, "i");
+MODULE_PARM(TX_RING_SIZE, "i");
+#endif
MODULE_AUTHOR("The Linux Kernel Team");
MODULE_DESCRIPTION("Digital 21*4* Tulip ethernet driver");
@@ -494,30 +516,18 @@
to an alternate media type. */
tp->timer.expires = RUN_AT(next_tick);
add_timer(&tp->timer);
-}
-
-#ifdef CONFIG_NET_HW_FLOWCONTROL
-/* Enable receiver */
-void tulip_xon(struct net_device *dev)
-{
- struct tulip_private *tp = (struct tulip_private *)dev->priv;
- clear_bit(tp->fc_bit, &netdev_fc_xoff);
- if (netif_running(dev)){
-
- tulip_refill_rx(dev);
- outl(tulip_tbl[tp->chip_id].valid_intrs, dev->base_addr+CSR7);
- }
+ init_timer(&tp->oom_timer);
+ tp->oom_timer.data = (unsigned long)dev;
+ tp->oom_timer.function = oom_timer;
}
-#endif
+
static int
tulip_open(struct net_device *dev)
{
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- struct tulip_private *tp = (struct tulip_private *)dev->priv;
-#endif
int retval;
+ int flg = 0;
MOD_INC_USE_COUNT;
if ((retval = request_irq(dev->irq, &tulip_interrupt, SA_SHIRQ, dev->name, dev))) {
@@ -525,14 +535,30 @@
return retval;
}
- tulip_init_ring (dev);
-
- tulip_up (dev);
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- tp->fc_bit = netdev_register_fc(dev, tulip_xon);
+#ifdef TULIP_SKB_RECYCLING
+ flg = 1;
+ {
+ int i;
+ struct tulip_private *adapter = dev->priv;
+ for (i=0; i<NR_CPUS; i++) {
+ skb_queue_head_init(&(adapter->tulip_recycle[i].list));
+ }
+ }
#endif
+ printk("tulip: %s rx_ring_size: %i skb-recycling: %i skb-hotlist: %i weight: %i\n",
+ dev->name, RX_RING_SIZE, flg, skb_hotlist, dev->weight);
+ printk("tulip: %s max_interrupt_work: %i rx_copybreak: %i tx_ring_size: %i\n",
+ dev->name, max_interrupt_work, rx_copybreak, TX_RING_SIZE);
+
+ tulip_init_ring (dev);
+
+ tulip_up (dev);
+
+#ifdef EXTRA_STATS
+ tulip_open_misc(dev);
+#endif
netif_start_queue (dev);
return 0;
@@ -640,10 +666,7 @@
#endif
/* Stop and restart the chip's Tx processes . */
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (tp->fc_bit && test_bit(tp->fc_bit,&netdev_fc_xoff))
- printk("BUG tx_timeout restarting rx when fc on\n");
-#endif
+
tulip_restart_rxtx(tp);
/* Trigger an immediate transmit demand. */
outl(0, ioaddr + CSR1);
@@ -719,6 +742,16 @@
spin_lock_irqsave(&tp->lock, eflags);
+ /* See if we can free slots on the output ring. In real life
+ examples we have seen between 2-10% of the slots cleared here */
+
+#ifdef NOT_NOW
+#ifdef EXTRA_STATS
+ tp->stats_tx_xmit_refilled +=
+#endif
+ tx_ring_free(dev);
+#endif
+
/* Calculate the next Tx descriptor entry. */
entry = tp->cur_tx % TX_RING_SIZE;
@@ -802,6 +835,7 @@
unsigned long flags;
del_timer_sync (&tp->timer);
+ del_timer_sync (&tp->oom_timer);
spin_lock_irqsave (&tp->lock, flags);
@@ -845,15 +879,31 @@
netif_stop_queue (dev);
-#ifdef CONFIG_NET_HW_FLOWCONTROL
- if (tp->fc_bit) {
- int bit = tp->fc_bit;
- tp->fc_bit = 0;
- netdev_unregister_fc(bit);
- }
+#ifdef tEXTRA_STATS
+ tulip_close_misc(dev);
#endif
tulip_down (dev);
+ /* Schedule while outstanding skb's exists */
+
+#ifdef TULIP_SKB_RECYCLING
+ for (i=0; i<NR_CPUS; i++) {
+ while (tp->cnt[i]) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+ }
+
+ for (i=0; i<NR_CPUS; i++) {
+ struct sk_buff *skb;
+ struct sk_buff_head *list = &tp->tulip_recycle[i].list;
+ while ((skb=skb_dequeue(list))!=NULL) {
+ skb->recycle_dev = NULL;
+ kfree_skb(skb);
+ }
+ }
+#endif
+
if (tulip_debug > 1)
printk (KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n",
dev->name, inl (ioaddr + CSR5));
@@ -1467,6 +1517,17 @@
&tp->rx_ring_dma);
if (!tp->rx_ring)
goto err_out_mtable;
+
+#ifdef TULIP_ADJUSTABLE_RING
+ tp->rx_buffers = (struct ring_info*)(kmalloc(sizeof(struct ring_info) *
+ (RX_RING_SIZE + TX_RING_SIZE),
+ GFP_KERNEL));
+ if (!tp->rx_buffers)
+ goto err_out_free_ring;
+ memset(tp->rx_buffers, 0, sizeof(struct ring_info) * (RX_RING_SIZE + TX_RING_SIZE));
+ tp->tx_buffers = &(tp->rx_buffers[RX_RING_SIZE]);
+#endif
+
tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE);
tp->tx_ring_dma = tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * RX_RING_SIZE;
@@ -1717,11 +1778,17 @@
dev->hard_start_xmit = tulip_start_xmit;
dev->tx_timeout = tulip_tx_timeout;
dev->watchdog_timeo = TX_TIMEOUT;
+ dev->poll = tulip_poll;
+ dev->weight = dev_weight;
dev->stop = tulip_close;
dev->get_stats = tulip_get_stats;
dev->do_ioctl = private_ioctl;
dev->set_multicast_list = set_rx_mode;
+#ifdef TULIP_SKB_RECYCLING
+ dev->skb_recycle = tulip_recycle;
+#endif
+
if (register_netdev(dev))
goto err_out_free_ring;
@@ -1811,6 +1878,9 @@
/* put the chip in snooze mode until opened */
tulip_set_power_state (tp, 0, 1);
+#ifdef EXTRA_STATS
+ tulip_init_one_misc(dev);
+#endif
return 0;
err_out_free_ring:
@@ -1822,6 +1892,11 @@
err_out_mtable:
if (tp->mtable)
kfree (tp->mtable);
+#ifdef TULIP_ADJUSTABLE_RING
+ if (tp->rx_buffers)
+ kfree (tp->rx_buffers);
+#endif
+
#ifndef USE_IO_OPS
iounmap((void *)ioaddr);
@@ -1876,6 +1951,10 @@
if (!dev)
return;
+#ifdef EXTRA_STATS
+ tulip_remove_one_misc(dev);
+#endif
+
tp = dev->priv;
pci_free_consistent (pdev,
sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
@@ -1884,6 +1963,11 @@
unregister_netdev (dev);
if (tp->mtable)
kfree (tp->mtable);
+#ifdef TULIP_ADJUSTABLE_RING
+ if (tp->rx_buffers)
+ kfree (tp->rx_buffers);
+#endif
+
#ifndef USE_IO_OPS
iounmap((void *)dev->base_addr);
#endif
@@ -1895,6 +1979,39 @@
}
+
+
+#ifdef TULIP_SKB_RECYCLING
+
+int tulip_recycle(struct sk_buff *skb)
+{
+ struct tulip_private *adapter = skb->recycle_dev->priv;
+
+ /* Store for right CPU. For this we use skb->tag */
+ struct sk_buff_head *list = &adapter->tulip_recycle[skb->tag].list;
+
+ if (skb_cloned(skb))
+ BUG();
+
+ /*
+ decrease our outstanding skb's:
+ 1) either we store in the list OR
+ 2) we ignore so gets to kfree
+ */
+
+ adapter->cnt[skb->tag]--;
+
+ if (skb_queue_len(list) <= skb_hotlist) {
+
+ /* LIFO queue for cache friendliness */
+ skb_queue_head(list, skb);
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+
static struct pci_driver tulip_driver = {
name: DRV_NAME,
id_table: tulip_pci_tbl,
--- linux-2.4.19.p3/drivers/net/tulip/tulip.h Tue Nov 5 21:33:22 2002
+++ linux-2.4.19.p4/drivers/net/tulip/tulip.h Tue Nov 12 18:59:00 2002
@@ -16,6 +16,11 @@
#ifndef __NET_TULIP_H__
#define __NET_TULIP_H__
+#define DRV_NAME "tulip"
+#define DRV_VERSION "1.1.1-NAPI"
+#define DRV_RELDATE "Feb 16, 2002"
+
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/types.h>
@@ -26,7 +31,22 @@
#include <asm/io.h>
#include <asm/irq.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#endif
+
+/* #define EXTRA_STATS 1 */
+#undef USE_MITIGATION
+//#define USE_MITIGATION
+/* #define TULIP_ADJUSTABLE_RING */
+
+#ifdef CONFIG_NET_SKB_RECYCLING
+/* #define TULIP_SKB_RECYCLING */
+#endif
+#ifdef TULIP_SKB_RECYCLING
+int tulip_recycle(struct sk_buff *skb);
+#endif
/* undefine, or define to various debugging levels (>4 == obscene levels) */
#define TULIP_DEBUG 1
@@ -126,6 +146,7 @@
CFDD_Snooze = (1 << 30),
};
+#define RxPollInt (RxIntr|RxNoBuf|RxDied|RxJabber)
/* The bits in the CSR5 status registers, mostly interrupt sources. */
enum status_bits {
@@ -261,8 +282,13 @@
Making the Tx ring too large decreases the effectiveness of channel
bonding and packet priority.
There are no ill effects from too-large receive rings. */
-#define TX_RING_SIZE 16
-#define RX_RING_SIZE 32
+#ifdef TULIP_ADJUSTABLE_RING
+extern int TX_RING_SIZE;
+extern int RX_RING_SIZE;
+#else
+#define TX_RING_SIZE 128
+#define RX_RING_SIZE 256
+#endif
#define MEDIA_MASK 31
@@ -343,16 +369,59 @@
struct tulip_tx_desc *tx_ring;
dma_addr_t rx_ring_dma;
dma_addr_t tx_ring_dma;
+#ifdef TULIP_ADJUSTABLE_RING
/* The saved address of a sent-in-place packet/buffer, for skfree(). */
- struct ring_info tx_buffers[TX_RING_SIZE];
+ struct ring_info* tx_buffers; /* should be TX_RING_SIZE long */
+ /* The addresses of receive-in-place skbuffs. */
+ struct ring_info* rx_buffers; /* should be RX_RING_SIZE long */
+#else
+ struct ring_info tx_buffers[TX_RING_SIZE];
/* The addresses of receive-in-place skbuffs. */
- struct ring_info rx_buffers[RX_RING_SIZE];
+ struct ring_info rx_buffers[RX_RING_SIZE];
+#endif
u16 setup_frame[96]; /* Pseudo-Tx frame to init address table. */
int chip_id;
int revision;
int flags;
+ int mit_on;
struct net_device_stats stats;
+#ifdef EXTRA_STATS
+ unsigned long stats_tx_xmit_refilled; /* Pkts xmit-filled */
+ unsigned long stats_tx_irq_refilled; /* Pktss irq-filled*/
+ unsigned long stats_poll_starts;
+ unsigned long stats_poll_pkts;
+#ifdef USE_MITIGATION
+ unsigned long stats_poll_pkts_mit;
+#endif
+ unsigned long stats_poll_exit_done;
+ unsigned long stats_poll_exit_not_done;
+ unsigned long stats_poll_exit_oom;
+ unsigned long stats_poll_exit_done_rx_pending;
+ unsigned long stats_poll_zero_rx;
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc_ent;
+ char proc_ent_name[32];
+#endif
+ /*Tulip interrupts causes */
+ unsigned long stats_intr_normal;
+ unsigned long stats_intr_abnormal;
+ unsigned long stats_intr_timer;
+
+ unsigned long stats_intr_rx;
+ unsigned long stats_intr_rx_nobuf;
+ unsigned long stats_intr_rx_died;
+ unsigned long stats_intr_rx_jabber;
+
+ unsigned long stats_intr_tx;
+ unsigned long stats_intr_tx_died;
+ unsigned long stats_intr_tx_nobuf;
+ unsigned long rx_small_skb_failure;
+ unsigned long stats_intr_TPLnkPass;
+ unsigned long open_time; /* jiffies for last open */
+
+#endif /* EXTRA_STATS */
struct timer_list timer; /* Media selection timer. */
+ struct timer_list oom_timer; /* Out of memory timer. */
u32 mc_filter[2];
spinlock_t lock;
spinlock_t mii_lock;
@@ -390,7 +459,16 @@
unsigned long nir;
unsigned long base_addr;
int csr12_shadow;
- int pad0; /* Used for 8-byte alignment */
+ int rx_byte_limit; /* How many bytes to rx before bailing out of the loop */
+
+#ifdef TULIP_SKB_RECYCLING
+ unsigned int cnt[NR_CPUS];
+
+ union {
+ struct sk_buff_head list;
+ char pad[SMP_CACHE_BYTES];
+ } tulip_recycle[NR_CPUS];
+#endif
};
@@ -424,6 +502,7 @@
extern unsigned int tulip_max_interrupt_work;
extern int tulip_rx_copybreak;
void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
+int tulip_poll(struct net_device *dev, int *budget);
int tulip_refill_rx(struct net_device *dev);
/* media.c */
@@ -448,11 +527,22 @@
extern const char * const medianame[];
extern const char tulip_media_cap[];
extern struct tulip_chip_table tulip_tbl[];
+void oom_timer(unsigned long data);
extern u8 t21040_csr13[];
extern u16 t21041_csr13[];
extern u16 t21041_csr14[];
extern u16 t21041_csr15[];
+/* tulip_misc.c */
+#ifdef EXTRA_STATS
+void tulip_init_one_misc(struct net_device *dev);
+void tulip_remove_one_misc (struct net_device *dev);
+void tulip_open_misc(struct net_device *dev);
+void tulip_close_misc(struct net_device *dev);
+void ave_get(unsigned long arg);
+void record_interrupt_cause( struct net_device *dev, int csr5);
+#endif
+
#ifndef USE_IO_OPS
#undef inb
#undef inw
@@ -498,3 +588,6 @@
}
#endif /* __NET_TULIP_H__ */
+
+
+
--------------070406040307030501010805--