tulip bug, alternative fix, #3 (tested)

Wolfgang Walter wolfgang.walter@stusta.mhn.de
Sun Dec 12 17:31:39 1999


On Sun, Dec 12, 1999 at 09:44:14PM +0100, Stephen R. van den Berg wrote:
> Wolfgang Walter wrote:
> >I sent description of the bugs to this list and to Donald Becker. As I haven't
> >got an answer from him, yet, I don't know if he is already working on a fix or
> >what he thinks about this patch. Maybe he's already working on a better fix.
> 
> Well, I looked at the patch.  Haven't yet doublechecked the exact
> IO-commands, but, a casual comparison of what you did and past discussions
> on the list reveals that you simply disable the "Too much work during
> interrupt" compensations.  Donald remarked that this is not a good idea.

Yes, too. I also changed the way the driver tries to detect too-much-work.

The new check does not check interrupts, but the number of packets sent and
received (and it avoids to mix them).

The to-much-work-handling had serveral problems. Especially it acknowledged
interrupts and disabled the last one occured. This was the reason for
permanent tx-hangs (probably tx-interrupts could be acknowlegded without
beeing handled).

I think the too-much-work should work rather acceptable now:

1. if to much tx-work has to be done in the interrupt-handler the tx-ring
you have less cpu-time to send

2. if you get to much packets sent rx-ring gets full and receiving is
suspended.

If this is not enough for receiving we may leave the card longer in
rx-suspend-state: we do not handle received-packets for a certain
amount of time so the rx-ring remains full and the card in rx-suspend
mode.

> I'm not sure if he's right (didn't have time to check the logic yet), but
> simply skipping it seems almost a bit too easy.
> 
> Comparing his patches for v0.91u with the ones you supplied, I come up
> with a merged version (still suitable for standalone operation with
> 2.2.x) that takes the INTR_MITIGATION support into account (I'm not even
> sure if that can make a difference in this lockup case).
> 
> Oh yes, incidentally, your patches seem relative to a different 0.91g version

Its the one of 2.2.14pre4.

> than the one I have (and picked up from Donald's website).  Tiny differences,
> but enough to screw up the diffs/patch.
> 
> If people could review this merged version (even test it perhaps?), and
> tell me if I made a mess out of it or not?  This patch is relative to
> the v0.91g which I got from Donald's website.
> 
> --- tulip.c.91g	Sat Dec 11 19:37:43 1999
> +++ ../tulip.c	Sun Dec 12 06:27:12 1999
> @@ -18,7 +18,7 @@
>  */
>  
>  #define SMP_CHECK
> -static const char version[] = "tulip.c:v0.91g 7/16/99 becker@cesdis.gsfc.nasa.gov\n";
> +static const char version[] = "tulip.c:v0.91gB 7/16/99 becker@cesdis.gsfc.nasa.gov\n";
>  
>  /* A few user-configurable values. */
>  
> @@ -51,6 +51,7 @@
>     bonding and packet priority.
>     There are no ill effects from too-large receive rings. */
>  #define TX_RING_SIZE	16
> +#define TX_QUEUE_LEN	10
>  #define RX_RING_SIZE	32
>  
>  /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
> @@ -361,7 +362,7 @@
>  	HAS_MII=1, HAS_MEDIA_TABLE=2, CSR12_IN_SROM=4, ALWAYS_CHECK_MII=8,
>  	HAS_PWRDWN=0x10, MC_HASH_ONLY=0x20, /* Hash-only multicast filter. */
>  	HAS_PNICNWAY=0x80, HAS_NWAY143=0x40,	/* Uses internal NWay xcvr. */
> -	HAS_8023X=0x100,
> +	HAS_INTR_MITIGATION=0x100, HAS_8023X=0x400,
>  };
>  static struct tulip_chip_table {
>  	char *chip_name;
> @@ -375,7 +376,8 @@
>    { "Digital DS21140 Tulip", 128, 0x0001ebef,
>  	HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, tulip_timer },
>    { "Digital DS21143 Tulip", 128, 0x0801fbff,
> -	HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_PWRDWN | HAS_NWAY143,
> +	HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_PWRDWN | HAS_NWAY143
> +	| HAS_INTR_MITIGATION,
>  	t21142_timer },
>    { "Lite-On 82c168 PNIC", 256, 0x0001ebef,
>  	HAS_MII | HAS_PNICNWAY, pnic_timer },
> @@ -433,7 +435,7 @@
>  
>  /* The bits in the CSR5 status registers, mostly interrupt sources. */
>  enum status_bits {
> -	TimerInt=0x800, TPLnkFail=0x1000, TPLnkPass=0x10,
> +	TimerInt=0x800, SytemError=0x2000, TPLnkFail=0x1000, TPLnkPass=0x10,
>  	NormalIntr=0x10000, AbnormalIntr=0x8000,
>  	RxJabber=0x200, RxDied=0x100, RxNoBuf=0x80, RxIntr=0x40,
>  	TxFIFOUnderflow=0x20, TxJabber=0x08, TxNoBuf=0x04, TxDied=0x02, TxIntr=0x01,
> @@ -530,6 +532,9 @@
>  	int cur_index;						/* Current media index. */
>  	int saved_if_port;
>  	unsigned char pci_bus, pci_devfn;
> +	int ttimer;
> +	int susp_rx;
> +	unsigned long nir;
>  	int pad0, pad1;						/* Used for 8-byte alignment */
>  };
>  
> @@ -548,6 +553,7 @@
>  static void tulip_tx_timeout(struct device *dev);
>  static void tulip_init_ring(struct device *dev);
>  static int tulip_start_xmit(struct sk_buff *skb, struct device *dev);
> +static int tulip_refill_rx(struct device *dev);
>  static int tulip_rx(struct device *dev);
>  static void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
>  static int tulip_close(struct device *dev);
> @@ -2484,6 +2490,9 @@
>  	tp->tx_full = 0;
>  	tp->cur_rx = tp->cur_tx = 0;
>  	tp->dirty_rx = tp->dirty_tx = 0;
> +	tp->susp_rx = 0;
> +	tp->ttimer = 0;
> +	tp->nir = 0;
>  
>  	for (i = 0; i < RX_RING_SIZE; i++) {
>  		tp->rx_ring[i].status = 0x00000000;
> @@ -2545,11 +2554,11 @@
>  	tp->tx_skbuff[entry] = skb;
>  	tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data);
>  
> -	if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */
> +	if (tp->cur_tx - tp->dirty_tx < TX_QUEUE_LEN/2) {/* Typical path */
>  		flag = 0x60000000; /* No interrupt */
> -	} else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) {
> +	} else if (tp->cur_tx - tp->dirty_tx == TX_QUEUE_LEN/2) {
>  		flag = 0xe0000000; /* Tx-done intr. */
> -	} else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) {
> +	} else if (tp->cur_tx - tp->dirty_tx < TX_QUEUE_LEN - 2) {
>  		flag = 0x60000000; /* No Tx-done intr. */
>  	} else {		/* Leave room for set_rx_mode() to fill entries. */
>  		tp->tx_full = 1;
> @@ -2578,7 +2587,15 @@
>  	struct device *dev = (struct device *)dev_instance;
>  	struct tulip_private *tp = (struct tulip_private *)dev->priv;
>  	long ioaddr = dev->base_addr;
> -	int csr5, work_budget = max_interrupt_work;
> +	int csr5;
> +	int entry;
> +	int missed;
> +	int rx = 0;
> +	int tx = 0;
> +	int oi = 0;
> +	int maxrx = RX_RING_SIZE;
> +	int maxtx = TX_RING_SIZE;
> +	int maxoi = TX_RING_SIZE;
>  
>  #if defined(__i386__) && defined(SMP_CHECK)
>  	if (test_and_set_bit(0, (void*)&dev->interrupt)) {
> @@ -2596,6 +2613,8 @@
>  	dev->interrupt = 1;
>  #endif
>  
> +	tp->nir++;
> +
>  	do {
>  		csr5 = inl(ioaddr + CSR5);
>  		/* Acknowledge all of the current interrupt sources ASAP. */
> @@ -2608,10 +2627,12 @@
>  		if ((csr5 & (NormalIntr|AbnormalIntr)) == 0)
>  			break;
>  
> -		if (csr5 & (RxIntr | RxNoBuf))
> -			work_budget -= tulip_rx(dev);
> +		if (csr5 & (RxIntr | RxNoBuf)) {
> +			rx += tulip_rx(dev);
> +			tulip_refill_rx(dev);
> +		}
>  
> -		if (csr5 & (TxNoBuf | TxDied | TxIntr)) {
> +		if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) {
>  			unsigned int dirty_tx;
>  
>  			for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0;
> @@ -2656,6 +2677,7 @@
>  				/* Free the original skb. */
>  				dev_free_skb(tp->tx_skbuff[entry]);
>  				tp->tx_skbuff[entry] = 0;
> +				tx++;
>  			}
>  
>  #ifndef final_version
> @@ -2667,7 +2689,7 @@
>  #endif
>  
>  			if (tp->tx_full && dev->tbusy
> -				&& tp->cur_tx - dirty_tx  < TX_RING_SIZE - 2) {
> +				&& tp->cur_tx - dirty_tx  < TX_QUEUE_LEN - 4) {
>  				/* The ring is no longer full, clear tbusy. */
>  				tp->tx_full = 0;
>  				dev->tbusy = 0;
> @@ -2698,38 +2720,77 @@
>  				/* Restart the transmit process. */
>  				outl(tp->csr6 | 0x0002, ioaddr + CSR6);
>  				outl(tp->csr6 | 0x2002, ioaddr + CSR6);
> +				outl(0, ioaddr + CSR1);
>  			}
>  			if (csr5 & RxDied) {		/* Missed a Rx frame. */
>  				tp->stats.rx_errors++;
>  				tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff;
>  				outl(tp->csr6 | 0x2002, ioaddr + CSR6);
>  			}
> -			if (csr5 & TimerInt) {
> -				if (tulip_debug > 2)
> -					printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
> -						   dev->name, csr5);
> -				outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
> -			}
>  			if (csr5 & (TPLnkPass | TPLnkFail | 0x08000000)) {
>  				if (tp->link_change)
>  					(tp->link_change)(dev, csr5);
>  			}
> +			if (csr5 & SytemError) {
> +				printk(KERN_ERR "%s: (%lu) System Error occured\n", dev->name, tp->nir);
> +			}
>  			/* Clear all error sources, included undocumented ones! */
>  			outl(0x0800f7ba, ioaddr + CSR5);
> +			oi++;
>  		}
> -		if (--work_budget < 0) {
> +		if (csr5 & TimerInt) {
> +#if 0
> +			if (tulip_debug > 2)
> +				printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
> +					   dev->name, csr5);
> +			outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
> +#endif
> +			tp->ttimer = 0;
> +			oi++;
> +		}
> +		if (tx > maxtx || rx > maxrx || oi > maxoi) {
> +#if 0
>  			if (tulip_debug > 1)
> +#endif
>  				printk(KERN_WARNING "%s: Too much work during an interrupt, "
> -					   "csr5=0x%8.8x.\n", dev->name, csr5);
> +					   "csr5=0x%8.8x. (%lu) (%d,%d,%d)\n", dev->name, csr5, tp->nir, tx, rx, oi);
>  			/* Acknowledge all interrupt sources. */
> +#if 1
>  			outl(0x8001ffff, ioaddr + CSR5);
> -			/* Clear all interrupting sources, set timer to re-enable. */
> -			outl(((~csr5) & 0x0001ebef) | 0x0800, ioaddr + CSR7);
> -			outl(12, ioaddr + CSR11);
> +			if (tp->flags & HAS_INTR_MITIGATION)
> +				outl(0x45240000, ioaddr + CSR11);
> +			else {
> +				/* Mask all interrupting sources, set timer to e-enable. */
> +				outl(((~csr5) & 0x0001ebef) | AbnormalIntr | TimerInt,
> +					 ioaddr + CSR7);
> +				outl(0x0012, ioaddr + CSR11);
> +			}
> + 			tp->ttimer = 1;
> +#endif

This will not work. You must remove the else-tree.

1. You switch of interrupts and never switch them on again:

2. Switching of interrupts was the problem because some people observed.

HAS_INTR_MITIGATION is probably a good method.

>  			break;
>  		}
>  	} while (1);
>  
> +	tulip_refill_rx(dev);
> +
> +	/* check if we card is in suspend mode */
> +	entry = tp->dirty_rx % RX_RING_SIZE;
> +	if (tp->rx_skbuff[entry] == NULL) {
> +		printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", dev->name, tp->nir, tp->cur_rx, tp->ttimer, rx);
> +		if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) {
> +			printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", dev->name, tp->nir);
> +			outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt,
> +				ioaddr + CSR7);
> +			outl(TimerInt, ioaddr + CSR5);
> +			outl(12, ioaddr + CSR11);
> +			tp->ttimer = 1;
> +		}
> +	}
> +
> +	if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) {
> +		tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed;
> +	}
> +
>  	if (tulip_debug > 3)
>  		printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n",
>  			   dev->name, inl(ioaddr + CSR5));
> @@ -2742,13 +2803,36 @@
>  	return;
>  }
>  
> +static int tulip_refill_rx(struct device *dev)
> +{
> +	struct tulip_private *tp = (struct tulip_private *)dev->priv;
> +	int entry;
> +	int refilled = 0;
> +
> +	/* Refill the Rx ring buffers. */
> +	for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) {
> +		entry = tp->dirty_rx % RX_RING_SIZE;
> +		if (tp->rx_skbuff[entry] == NULL) {
> +			struct sk_buff *skb;
> +			skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ);
> +			if (skb == NULL)
> +				break;
> +			skb->dev = dev;			/* Mark as being used by this device. */
> +			tp->rx_ring[entry].buffer1 = virt_to_bus(skb->tail);
> +			refilled++;
> +		}
> +		tp->rx_ring[entry].status = cpu_to_le32(DescOwned);
> +	}
> +	return refilled;
> +}
> +
>  static int
>  tulip_rx(struct device *dev)
>  {
>  	struct tulip_private *tp = (struct tulip_private *)dev->priv;
>  	int entry = tp->cur_rx % RX_RING_SIZE;
>  	int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
> -	int work_done = 0;
> +	int received = 0;
>  
>  	if (tulip_debug > 4)
>  		printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry,
> @@ -2810,7 +2894,6 @@
>  				memcpy(skb_put(skb, pkt_len),
>  					   bus_to_virt(tp->rx_ring[entry].buffer1), pkt_len);
>  #endif
> -				work_done++;
>  			} else { 	/* Pass up the skb already on the Rx ring. */
>  				char *temp = skb_put(skb = tp->rx_skbuff[entry], pkt_len);
>  				tp->rx_skbuff[entry] = NULL;
> @@ -2830,25 +2913,11 @@
>  			tp->stats.rx_bytes += pkt_len;
>  #endif
>  		}
> +		received++;
>  		entry = (++tp->cur_rx) % RX_RING_SIZE;
>  	}
>  
> -	/* Refill the Rx ring buffers. */
> -	for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) {
> -		entry = tp->dirty_rx % RX_RING_SIZE;
> -		if (tp->rx_skbuff[entry] == NULL) {
> -			struct sk_buff *skb;
> -			skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ);
> -			if (skb == NULL)
> -				break;
> -			skb->dev = dev;			/* Mark as being used by this device. */
> -			tp->rx_ring[entry].buffer1 = virt_to_bus(skb->tail);
> -			work_done++;
> -		}
> -		tp->rx_ring[entry].status = DescOwned;
> -	}
> -
> -	return work_done;
> +	return received;
>  }
>  
>  static int
> -- 
> Sincerely,                                                          srb@cuci.nl
>            Stephen R. van den Berg (AKA BuGless).
> 
> Climate is what you expect.  Weather is what you get.


Greetings,

Wolfgang Walter

-- 
Veni, Vidi, VISA:
	I came, I saw, I did a little shopping.