--- linux-2.3.99-pre4.1/drivers/block/ll_rw_blk.c.~1~ Thu Mar 30 18:14:46 2000 +++ linux-2.3.99-pre4.1/drivers/block/ll_rw_blk.c Fri Mar 31 02:54:32 2000 @@ -120,6 +120,14 @@ */ int * max_sectors[MAX_BLKDEV]; +/* + * MUTEX locking to prevent concurrent fsync()s to the block devices. + * (Concurrent syncs thrash the disk enormously and result in much worse + * performance than serial syncs.) + */ +char blk_synclock[MAX_BLKDEV] = {0}; + + static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -411,6 +419,121 @@ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +/* Return up to two hd_structs on which to do IO accounting for a given + * request. On a partitioned device, we want to account both against + * the partition and against the whole disk. */ +static void locate_hd_struct(struct request *req, + struct hd_struct **hd1, + struct hd_struct **hd2) +{ + struct gendisk *gd; + + *hd1 = NULL; + *hd2 = NULL; + + gd = major_gendisk[MAJOR(req->rq_dev)]; + if (gd && gd->part) { + /* Mask out the partition bits: account for the entire disk */ + int devnr = MINOR(req->rq_dev) >> gd->minor_shift; + int whole_minor = devnr << gd->minor_shift; + *hd1 = &gd->part[whole_minor]; + if (whole_minor != MINOR(req->rq_dev)) + *hd2= &gd->part[MINOR(req->rq_dev)]; + } +} + +/* Round off the performance stats on an hd_struct. The average IO + * queue length and utilisation statistics are maintained by observing + * the current state of the queue length and the amount of time it has + * been in this state for. Normally, that accounting is done on IO + * completion, but that can result in more than a second's worth of IO + * being accounted for within any one second, leading to >100% + * utilisation. To deal with that, we do a round-off before returning + * the results when reading /proc/partitions, accounting immediately for + * all queue usage up to the current jiffies and restarting the counters + * again. */ +void disk_round_stats(struct hd_struct *hd) +{ + unsigned long now = jiffies; + + hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change)); + hd->last_queue_change = now; + + if (hd->ios_in_flight) + hd->io_ticks += (now - hd->last_idle_time); + hd->last_idle_time = now; +} + + +static inline void down_ios(struct hd_struct *hd) +{ + disk_round_stats(hd); + --hd->ios_in_flight; +} + +static inline void up_ios(struct hd_struct *hd) +{ + disk_round_stats(hd); + ++hd->ios_in_flight; +} + +static void account_io_start(struct hd_struct *hd, struct request *req, + int merge, int sectors) +{ + switch (req->cmd) { + case READ: + if (merge) + hd->rd_merges++; + hd->rd_sectors += sectors; + break; + case WRITE: + if (merge) + hd->wr_merges++; + hd->wr_sectors += sectors; + break; + default: + } + if (!merge) + up_ios(hd); +} + +static void account_io_end(struct hd_struct *hd, struct request *req) +{ + unsigned long duration = jiffies - req->start_time; + switch (req->cmd) { + case READ: + hd->rd_ticks += duration; + hd->rd_ios++; + break; + case WRITE: + hd->wr_ticks += duration; + hd->wr_ios++; + break; + default: + } + down_ios(hd); +} + +void req_new_io(struct request *req, int merge, int sectors) +{ + struct hd_struct *hd1, *hd2; + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + account_io_start(hd1, req, merge, sectors); + if (hd2) + account_io_start(hd2, req, merge, sectors); +} + +void req_finished_io(struct request *req) +{ + struct hd_struct *hd1, *hd2; + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + account_io_end(hd1, req); + if (hd2) + account_io_end(hd2, req); +} + /* * add-request adds a request to the linked list. * It disables interrupts (aquires the request spinlock) so that it can muck @@ -463,6 +586,7 @@ int max_segments) { struct request *next; + struct hd_struct *hd1, *hd2; next = blkdev_next_request(req); if (req->sector + req->nr_sectors != next->sector) @@ -485,6 +609,15 @@ next->rq_status = RQ_INACTIVE; list_del(&next->queue); wake_up (&wait_for_request); + + /* One last thing: we have removed a request, so we now have one + less expected IO to complete for accounting purposes. */ + + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + down_ios(hd1); + if (hd2) + down_ios(hd2); } static inline void attempt_back_merge(request_queue_t * q, @@ -687,6 +820,7 @@ req->bhtail = bh; req->nr_sectors += count; drive_stat_acct(req, count, 0); + req_new_io(req, 1, count); elevator_merge_after(elevator, req, latency); @@ -717,6 +851,7 @@ req->sector = sector; req->nr_sectors += count; drive_stat_acct(req, count, 0); + req_new_io(req, 1, count); elevator_merge_before(elevator, req, latency); @@ -764,6 +899,8 @@ req->bh = bh; req->bhtail = bh; req->q = q; + req->start_time = jiffies; + req_new_io(req, 0, count); add_request(q, req, head, orig_latency); elevator_account_request(elevator, req); @@ -955,6 +1092,7 @@ BUG(); if (req->sem != NULL) up(req->sem); + req_finished_io(req); req->rq_status = RQ_INACTIVE; wake_up(&wait_for_request); } --- linux-2.3.99-pre4.1/drivers/scsi/scsi_lib.c.~1~ Wed Mar 29 22:35:18 2000 +++ linux-2.3.99-pre4.1/drivers/scsi/scsi_lib.c Thu Mar 30 18:15:09 2000 @@ -454,6 +454,7 @@ if (req->sem != NULL) { up(req->sem); } + req_finished_io(req); add_blkdev_randomness(MAJOR(req->rq_dev)); /* --- linux-2.3.99-pre4.1/fs/block_dev.c.~1~ Mon Mar 13 13:26:08 2000 +++ linux-2.3.99-pre4.1/fs/block_dev.c Thu Mar 30 18:15:09 2000 @@ -11,6 +11,7 @@ #include #include #include +#include #include --- linux-2.3.99-pre4.1/fs/buffer.c.~1~ Wed Mar 29 22:35:22 2000 +++ linux-2.3.99-pre4.1/fs/buffer.c Thu Mar 30 18:15:09 2000 @@ -273,6 +273,7 @@ * more buffers on the second pass). */ } while (wait && retry && ++pass<=2); + run_task_queue(&tq_disk); return err; } --- linux-2.3.99-pre4.1/fs/partitions/check.c.~1~ Mon Feb 28 15:05:35 2000 +++ linux-2.3.99-pre4.1/fs/partitions/check.c Fri Mar 31 02:33:34 2000 @@ -38,6 +38,7 @@ extern void initrd_load(void); struct gendisk *gendisk_head; +struct gendisk *major_gendisk[MAX_BLKDEV] = {0,}; int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ static int (*check_part[])(struct gendisk *hd, kdev_t dev, unsigned long first_sect, int first_minor) = { @@ -97,6 +98,9 @@ * This requires special handling here. */ switch (hd->major) { + case MD_MAJOR: + unit = (minor >> hd->minor_shift) + '0'; + break; case IDE9_MAJOR: unit += 2; case IDE8_MAJOR: @@ -232,6 +236,7 @@ int get_partition_list(char * page) { struct gendisk *p; + struct hd_struct *hd; char buf[64]; int n, len; @@ -239,10 +244,23 @@ for (p = gendisk_head; p; p = p->next) { for (n=0; n < (p->nr_real << p->minor_shift); n++) { if (p->part[n].nr_sects && len < PAGE_SIZE - 80) { - len += sprintf(page+len, - "%4d %4d %10d %s\n", + hd = &p->part[n]; + disk_round_stats(hd); + len += sprintf(page+len, + "%4d %4d %10d %s " + "%d %d %d %d %d %d %d %d %d %d %d %lu\n", p->major, n, p->sizes[n], - disk_name(p, n, buf)); + disk_name(p, n, buf), + hd->rd_ios, hd->rd_merges, + hd->rd_sectors, + hd->rd_ticks, + hd->wr_ios, hd->wr_merges, + hd->wr_sectors, + hd->wr_ticks, + hd->ios_in_flight, + hd->io_ticks, + hd->aveq, + jiffies); } } } @@ -388,6 +406,7 @@ if (!gdev) return; grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); + major_gendisk[MAJOR(dev)] = gdev; } void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) --- linux-2.3.99-pre4.1/include/linux/blkdev.h.~1~ Wed Mar 29 22:35:18 2000 +++ linux-2.3.99-pre4.1/include/linux/blkdev.h Thu Mar 30 18:15:37 2000 @@ -30,6 +30,7 @@ kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; + unsigned long start_time; unsigned long sector; unsigned long nr_sectors; unsigned int nr_segments; --- linux-2.3.99-pre4.1/include/linux/genhd.h.~1~ Thu Feb 17 11:50:55 2000 +++ linux-2.3.99-pre4.1/include/linux/genhd.h Thu Mar 30 18:15:35 2000 @@ -51,6 +51,22 @@ long nr_sects; int type; /* currently RAID or normal */ devfs_handle_t de; /* primary (master) devfs entry */ + + /* Performance stats: */ + unsigned int ios_in_flight; + unsigned int io_ticks; + unsigned int last_idle_time; + unsigned int last_queue_change; + unsigned int aveq; + + unsigned int rd_ios; + unsigned int rd_merges; + unsigned int rd_ticks; + unsigned int rd_sectors; + unsigned int wr_ios; + unsigned int wr_merges; + unsigned int wr_ticks; + unsigned int wr_sectors; }; #define GENHD_FL_REMOVABLE 1 @@ -75,6 +91,8 @@ }; #endif /* __KERNEL__ */ +extern struct gendisk *major_gendisk[]; + #ifdef CONFIG_SOLARIS_X86_PARTITION #define SOLARIS_X86_NUMSLICE 8 @@ -227,6 +245,19 @@ extern struct gendisk *gendisk_head; /* linked list of disks */ char *disk_name (struct gendisk *hd, int minor, char *buf); + +/* + * disk_round_stats is used to round off the IO statistics for a disk + * for a complete clock tick. + */ +void disk_round_stats(struct hd_struct *hd); + +/* + * Account for the completion of an IO request (used by drivers which + * bypass the normal end_request processing) + */ +struct request; +void req_finished_io(struct request *); extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister); .