[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: [PATCH] m68k: Atari SCSI - ST-DMA locking and error handling fixes



On Sun, Apr 26, 2009 at 09:24, Michael Schmitz
<schmitz@biophys.uni-duesseldorf.de> wrote:
> some fixes to the Atari NCR5380 SCSI driver: if the ST-DMA lock cannot be taken
> in the queue function, indicate host busy to the block layer. Fix return codes
> of abort and reset functions, and add scsi_unregister to module exit.
> Error handling is still messed up, most likely due to missing ST-DMA lock in
> abort and reset. This is still WIP, but might benefit users with a Falcon less
> prone to SCSI lockups, i.e. more stable SCSI clock signal.

Any news on this one ("still WIP")?

> Signed-off-by: Michael Schmitz <schmitz@debian.org>
>
>  Michael
>
> ---
>  drivers/scsi/atari_NCR5380.c |   78 +++++++++++++++++++++++++++--------------
>  drivers/scsi/atari_scsi.c    |   47 +++++++++++++++++++++----
>  2 files changed, 91 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c
> index 0471f88..3ba46d5 100644
> --- a/drivers/scsi/atari_NCR5380.c
> +++ b/drivers/scsi/atari_NCR5380.c
> @@ -966,13 +966,6 @@ static int NCR5380_queue_command(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
>
>        cmd->result = 0;
>
> -       /*
> -        * Insert the cmd into the issue queue. Note that REQUEST SENSE
> -        * commands are added to the head of the queue since any command will
> -        * clear the contingent allegiance condition that exists and the
> -        * sense data is only guaranteed to be valid while the condition exists.
> -        */
> -
>        local_irq_save(flags);
>        /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
>         * Otherwise a running NCR5380_main may steal the lock.
> @@ -987,10 +980,32 @@ static int NCR5380_queue_command(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
>         * alter queues and touch the lock.
>         */
>        if (!IS_A_TT()) {
> +               int rv;
> +               /* MSch: since we may now get called from softirq context here,
> +                * we cannot always sleep while waiting for the lock.
> +                * Use status from falcon_get_lock() to detect if the lock was
> +                * successfully taken, and return appropriate status to midlevel
> +                * otherwise.
> +                * We used to pause the midlevel SCSI timer here; races between
> +                * command timeout and lowlevel error handling should not hurt
> +                * because the command isn't in any of the queues yet.
> +                */
>                /* perhaps stop command timer here */
> -               falcon_get_lock();
> +               rv = falcon_get_lock();
>                /* perhaps restart command timer here */
> +               if (rv) {
> +                       local_irq_restore(flags);
> +                       return SCSI_MLQUEUE_HOST_BUSY;
> +               }
>        }
> +
> +       /*
> +        * Insert the cmd into the issue queue. Note that REQUEST SENSE
> +        * commands are added to the head of the queue since any command will
> +        * clear the contingent allegiance condition that exists and the
> +        * sense data is only guaranteed to be valid while the condition exists.
> +        */
> +
>        if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
>                LIST(cmd, hostdata->issue_queue);
>                SET_NEXT(cmd, hostdata->issue_queue);
> @@ -1014,10 +1029,12 @@ static int NCR5380_queue_command(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
>         * If we're not in an interrupt, we can call NCR5380_main()
>         * unconditionally, because it cannot be already running.
>         */
> -       if (in_interrupt() || ((flags >> 8) & 7) >= 6)
> +       /* As of 2.6.19, the coroutine has to be put on the task queue instead
> +        * of being called directly. It might still be called directly if not
> +        * in softirq, though. Need to test this.
> +        */
>                queue_main();
> -       else
> -               NCR5380_main(NULL);
> +
>        return 0;
>  }
>
> @@ -1463,6 +1480,7 @@ static int NCR5380_select(struct Scsi_Host *instance, Scsi_Cmnd *cmd, int tag)
>        ARB_PRINTK("scsi%d: arbitration complete\n", HOSTNO);
>
>        if (hostdata->connected) {
> +
>                NCR5380_write(MODE_REG, MR_BASE);
>                return -1;
>        }
> @@ -2065,7 +2083,6 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance)
>                                /*
>                                 * The preferred transfer method is going to be
>                                 * PSEUDO-DMA for systems that are strictly PIO,
> -                                * since we can let the hardware do the handshaking.
>                                 *
>                                 * For this to work, we need to know the transfersize
>                                 * ahead of time, since the pseudo-DMA code will sit
> @@ -2631,7 +2648,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance)
>  *     host byte of the result field to, if zero DID_ABORTED is
>  *     used.
>  *
> - * Returns : 0 - success, -1 on failure.
> + * Returns : SUCCESS - success, FAILED on failure.
>  *
>  * XXX - there is no way to abort the command that is currently
>  *      connected, you have to wait for it to complete.  If this is
> @@ -2701,11 +2718,12 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>                        local_irq_restore(flags);
>                        cmd->scsi_done(cmd);
>                        falcon_release_lock_if_possible(hostdata);
> -                       return SCSI_ABORT_SUCCESS;
> +                       return SUCCESS;
>                } else {
> -/*                     local_irq_restore(flags); */
> +                       /* not sure */
> +                       local_irq_restore(flags);
>                        printk("scsi%d: abort of connected command failed!\n", HOSTNO);
> -                       return SCSI_ABORT_ERROR;
> +                       return FAILED;
>                }
>        }
>  #endif
> @@ -2729,7 +2747,7 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>                         * yet... */
>                        tmp->scsi_done(tmp);
>                        falcon_release_lock_if_possible(hostdata);
> -                       return SCSI_ABORT_SUCCESS;
> +                       return SUCCESS;
>                }
>        }
>
> @@ -2747,7 +2765,7 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>        if (hostdata->connected) {
>                local_irq_restore(flags);
>                ABRT_PRINTK("scsi%d: abort failed, command connected.\n", HOSTNO);
> -               return SCSI_ABORT_SNOOZE;
> +               return FAILED;
>        }
>
>        /*
> @@ -2782,7 +2800,7 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>                        ABRT_PRINTK("scsi%d: aborting disconnected command.\n", HOSTNO);
>
>                        if (NCR5380_select(instance, cmd, (int)cmd->tag))
> -                               return SCSI_ABORT_BUSY;
> +                               return FAILED;
>
>                        ABRT_PRINTK("scsi%d: nexus reestablished.\n", HOSTNO);
>
> @@ -2809,7 +2827,7 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>                                        local_irq_restore(flags);
>                                        tmp->scsi_done(tmp);
>                                        falcon_release_lock_if_possible(hostdata);
> -                                       return SCSI_ABORT_SUCCESS;
> +                                       return SUCCESS;
>                                }
>                        }
>                }
> @@ -2835,7 +2853,9 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>         */
>        falcon_release_lock_if_possible(hostdata);
>
> -       return SCSI_ABORT_NOT_RUNNING;
> +       /* NCR5380 has FAILED here */
> +       /* return SUCCESS; */
> +       return FAILED;
>  }
>
>
> @@ -2844,16 +2864,18 @@ int NCR5380_abort(Scsi_Cmnd *cmd)
>  *
>  * Purpose : reset the SCSI bus.
>  *
> - * Returns : SCSI_RESET_WAKEUP
> + * Returns : SUCCESS
>  *
>  */
>
> +#undef FALCON_RESET_KILL_QUEUES
> +
>  static int NCR5380_bus_reset(Scsi_Cmnd *cmd)
>  {
>        SETUP_HOSTDATA(cmd->device->host);
>        int i;
>        unsigned long flags;
> -#if 1
> +#if defined(FALCON_RESET_KILL_QUEUES)
>        Scsi_Cmnd *connected, *disconnected_queue;
>  #endif
>
> @@ -2878,7 +2900,8 @@ static int NCR5380_bus_reset(Scsi_Cmnd *cmd)
>         * through anymore ... */
>        (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
>
> -#if 1  /* XXX Should now be done by midlevel code, but it's broken XXX */
> +#if defined(FALCON_RESET_KILL_QUEUES)
> +       /* XXX Should now be done by midlevel code, but it's broken XXX */
>        /* XXX see below                                            XXX */
>
>        /* MSch: old-style reset: actually abort all command processing here */
> @@ -2934,7 +2957,7 @@ static int NCR5380_bus_reset(Scsi_Cmnd *cmd)
>         * the midlevel code that the reset was SUCCESSFUL, and there is no
>         * need to 'wake up' the commands by a request_sense
>         */
> -       return SCSI_RESET_SUCCESS | SCSI_RESET_BUS_RESET;
> +       return SUCCESS;
>  #else /* 1 */
>
>        /* MSch: new-style reset handling: let the mid-level do what it can */
> @@ -2982,6 +3005,7 @@ static int NCR5380_bus_reset(Scsi_Cmnd *cmd)
>        local_irq_restore(flags);
>
>        /* we did no complete reset of all commands, so a wakeup is required */
> -       return SCSI_RESET_WAKEUP | SCSI_RESET_BUS_RESET;
> -#endif /* 1 */
> +       /* The new error handler code implicitly does this for us anyway */
> +       return SUCCESS;
> +#endif /* defined(FALCON_RESET_KILL_QUEUES) */
>  }
> diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
> index ad7a23a..9ebfbc6 100644
> --- a/drivers/scsi/atari_scsi.c
> +++ b/drivers/scsi/atari_scsi.c
> @@ -196,7 +196,7 @@ static unsigned long atari_dma_xfer_len(unsigned long wanted_len,
>  static irqreturn_t scsi_tt_intr(int irq, void *dummy);
>  static irqreturn_t scsi_falcon_intr(int irq, void *dummy);
>  static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata);
> -static void falcon_get_lock(void);
> +static int falcon_get_lock(void);
>  #ifdef CONFIG_ATARI_SCSI_RESET_BOOT
>  static void atari_scsi_reset_boot(void);
>  #endif
> @@ -498,6 +498,16 @@ static int falcon_dont_release = 0;
>  * again (but others waiting longer more probably will win).
>  */
>
> +/*
> + * MSch 20061228: in 2.6.x, the fairness wait appears to open a race with
> + * concurrent use of the lock by the IDE driver. Once the lock is taken by
> + * IDE, the SCSI queuecmd function can no longer schedule because it is run
> + * from softirq context a lot.
> + * Disable the fairness wait until I have had time to sort out the lock issues.
> + */
> +#undef FALCON_FAIRNESS_WAIT
> +#define FALCON_NEVER_SLEEP 1
> +
>  static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata)
>  {
>        unsigned long flags;
> @@ -519,7 +529,9 @@ static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata)
>                }
>                falcon_got_lock = 0;
>                stdma_release();
> +#if defined(FALCON_FAIRNESS_WAIT)
>                wake_up(&falcon_fairness_wait);
> +#endif
>        }
>
>        local_irq_restore(flags);
> @@ -540,21 +552,37 @@ static void falcon_release_lock_if_possible(struct NCR5380_hostdata *hostdata)
>  * Complicated, complicated.... Sigh...
>  */
>
> -static void falcon_get_lock(void)
> +/* MSch 20061229: in order to avoid sleeping when the lock is not available,
> + * return the current lock state here. atari_queue_command() will then return
> + * with error, causing the midlevel code to pause request submission.
> + */
> +static int falcon_get_lock(void)
>  {
>        unsigned long flags;
>
>        if (IS_A_TT())
> -               return;
> +               return 0;
>
>        local_irq_save(flags);
>
> -       while (!in_irq() && falcon_got_lock && stdma_others_waiting())
> +#if defined(FALCON_FAIRNESS_WAIT)
> +       /* MSch: we may not sleep even while in softirq ! */
> +       while (!in_interrupt() && falcon_got_lock && stdma_others_waiting())
>                sleep_on(&falcon_fairness_wait);
> -
> +#endif
>        while (!falcon_got_lock) {
>                if (in_irq())
>                        panic("Falcon SCSI hasn't ST-DMA lock in interrupt");
> +               /* we may not sleep in soft interrupts neither, so bail out */
> +#if defined(FALCON_NEVER_SLEEP)
> +               if (stdma_islocked()) {
> +#else
> +               if (in_softirq() && stdma_islocked()) {
> +#endif
> +                       // printk(KERN_INFO "Falcon SCSI does not hold ST-DMA lock in softirq!\n" );
> +                       local_irq_restore(flags);
> +                       return 1;
> +               }
>                if (!falcon_trying_lock) {
>                        falcon_trying_lock = 1;
>                        stdma_lock(scsi_falcon_intr, NULL);
> @@ -569,6 +597,8 @@ static void falcon_get_lock(void)
>        local_irq_restore(flags);
>        if (!falcon_got_lock)
>                panic("Falcon SCSI: someone stole the lock :-(\n");
> +
> +       return 0;
>  }
>
>
> @@ -589,7 +619,7 @@ int atari_queue_command(Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
>  #endif
>
>
> -int __init atari_scsi_detect(struct scsi_host_template *host)
> +int atari_scsi_detect(struct scsi_host_template *host)
>  {
>        static int called = 0;
>        struct Scsi_Host *instance;
> @@ -652,6 +682,8 @@ int __init atari_scsi_detect(struct scsi_host_template *host)
>                }
>                atari_dma_phys_buffer = virt_to_phys(atari_dma_buffer);
>                atari_dma_orig_addr = 0;
> +               printk(KERN_ERR "atari_scsi_detect: ST-RAM "
> +                       "double buffer at %p\n", atari_dma_phys_buffer);
>        }
>  #endif
>        instance = scsi_register(host, sizeof(struct NCR5380_hostdata));
> @@ -745,6 +777,7 @@ int atari_scsi_release(struct Scsi_Host *sh)
>  {
>        if (IS_A_TT())
>                free_irq(IRQ_TT_MFP_SCSI, sh);
> +       scsi_unregister(atari_scsi_host);
>        if (atari_dma_buffer)
>                atari_stram_free(atari_dma_buffer);
>        return 1;
> @@ -828,7 +861,7 @@ int atari_scsi_bus_reset(Scsi_Cmnd *cmd)
>        } else {
>                atari_turnon_irq(IRQ_MFP_FSCSI);
>        }
> -       if ((rv & SCSI_RESET_ACTION) == SCSI_RESET_SUCCESS)
> +       if (rv == SUCCESS)
>                falcon_release_lock_if_possible(hostdata);
>
>        return rv;
> --
> 1.5.6.5
>
>



-- 
Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds


Reply to: