Root/block/elevator.c

1/*
2 * Block device elevator/IO-scheduler.
3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 *
6 * 30042000 Jens Axboe <axboe@kernel.dk> :
7 *
8 * Split the elevator a bit so that it is possible to choose a different
9 * one or even write a new "plug in". There are three pieces:
10 * - elevator_fn, inserts a new request in the queue list
11 * - elevator_merge_fn, decides whether a new buffer can be merged with
12 * an existing request
13 * - elevator_dequeue_fn, called when a request is taken off the active list
14 *
15 * 20082000 Dave Jones <davej@suse.de> :
16 * Removed tests for max-bomb-segments, which was breaking elvtune
17 * when run without -bN
18 *
19 * Jens:
20 * - Rework again to work with bio instead of buffer_heads
21 * - loose bi_dev comparisons, partition handling is right now
22 * - completely modularize elevator setup and teardown
23 *
24 */
25#include <linux/kernel.h>
26#include <linux/fs.h>
27#include <linux/blkdev.h>
28#include <linux/elevator.h>
29#include <linux/bio.h>
30#include <linux/module.h>
31#include <linux/slab.h>
32#include <linux/init.h>
33#include <linux/compiler.h>
34#include <linux/blktrace_api.h>
35#include <linux/hash.h>
36#include <linux/uaccess.h>
37#include <linux/pm_runtime.h>
38
39#include <trace/events/block.h>
40
41#include "blk.h"
42#include "blk-cgroup.h"
43
44static DEFINE_SPINLOCK(elv_list_lock);
45static LIST_HEAD(elv_list);
46
47/*
48 * Merge hash stuff.
49 */
50#define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
51
52/*
53 * Query io scheduler to see if the current process issuing bio may be
54 * merged with rq.
55 */
56static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
57{
58    struct request_queue *q = rq->q;
59    struct elevator_queue *e = q->elevator;
60
61    if (e->type->ops.elevator_allow_merge_fn)
62        return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
63
64    return 1;
65}
66
67/*
68 * can we safely merge with this request?
69 */
70bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
71{
72    if (!blk_rq_merge_ok(rq, bio))
73        return 0;
74
75    if (!elv_iosched_allow_merge(rq, bio))
76        return 0;
77
78    return 1;
79}
80EXPORT_SYMBOL(elv_rq_merge_ok);
81
82static struct elevator_type *elevator_find(const char *name)
83{
84    struct elevator_type *e;
85
86    list_for_each_entry(e, &elv_list, list) {
87        if (!strcmp(e->elevator_name, name))
88            return e;
89    }
90
91    return NULL;
92}
93
94static void elevator_put(struct elevator_type *e)
95{
96    module_put(e->elevator_owner);
97}
98
99static struct elevator_type *elevator_get(const char *name, bool try_loading)
100{
101    struct elevator_type *e;
102
103    spin_lock(&elv_list_lock);
104
105    e = elevator_find(name);
106    if (!e && try_loading) {
107        spin_unlock(&elv_list_lock);
108        request_module("%s-iosched", name);
109        spin_lock(&elv_list_lock);
110        e = elevator_find(name);
111    }
112
113    if (e && !try_module_get(e->elevator_owner))
114        e = NULL;
115
116    spin_unlock(&elv_list_lock);
117
118    return e;
119}
120
121static char chosen_elevator[ELV_NAME_MAX];
122
123static int __init elevator_setup(char *str)
124{
125    /*
126     * Be backwards-compatible with previous kernels, so users
127     * won't get the wrong elevator.
128     */
129    strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
130    return 1;
131}
132
133__setup("elevator=", elevator_setup);
134
135/* called during boot to load the elevator chosen by the elevator param */
136void __init load_default_elevator_module(void)
137{
138    struct elevator_type *e;
139
140    if (!chosen_elevator[0])
141        return;
142
143    spin_lock(&elv_list_lock);
144    e = elevator_find(chosen_elevator);
145    spin_unlock(&elv_list_lock);
146
147    if (!e)
148        request_module("%s-iosched", chosen_elevator);
149}
150
151static struct kobj_type elv_ktype;
152
153struct elevator_queue *elevator_alloc(struct request_queue *q,
154                  struct elevator_type *e)
155{
156    struct elevator_queue *eq;
157
158    eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
159    if (unlikely(!eq))
160        goto err;
161
162    eq->type = e;
163    kobject_init(&eq->kobj, &elv_ktype);
164    mutex_init(&eq->sysfs_lock);
165    hash_init(eq->hash);
166
167    return eq;
168err:
169    kfree(eq);
170    elevator_put(e);
171    return NULL;
172}
173EXPORT_SYMBOL(elevator_alloc);
174
175static void elevator_release(struct kobject *kobj)
176{
177    struct elevator_queue *e;
178
179    e = container_of(kobj, struct elevator_queue, kobj);
180    elevator_put(e->type);
181    kfree(e);
182}
183
184int elevator_init(struct request_queue *q, char *name)
185{
186    struct elevator_type *e = NULL;
187    int err;
188
189    /*
190     * q->sysfs_lock must be held to provide mutual exclusion between
191     * elevator_switch() and here.
192     */
193    lockdep_assert_held(&q->sysfs_lock);
194
195    if (unlikely(q->elevator))
196        return 0;
197
198    INIT_LIST_HEAD(&q->queue_head);
199    q->last_merge = NULL;
200    q->end_sector = 0;
201    q->boundary_rq = NULL;
202
203    if (name) {
204        e = elevator_get(name, true);
205        if (!e)
206            return -EINVAL;
207    }
208
209    /*
210     * Use the default elevator specified by config boot param or
211     * config option. Don't try to load modules as we could be running
212     * off async and request_module() isn't allowed from async.
213     */
214    if (!e && *chosen_elevator) {
215        e = elevator_get(chosen_elevator, false);
216        if (!e)
217            printk(KERN_ERR "I/O scheduler %s not found\n",
218                            chosen_elevator);
219    }
220
221    if (!e) {
222        e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
223        if (!e) {
224            printk(KERN_ERR
225                "Default I/O scheduler not found. " \
226                "Using noop.\n");
227            e = elevator_get("noop", false);
228        }
229    }
230
231    err = e->ops.elevator_init_fn(q, e);
232    return 0;
233}
234EXPORT_SYMBOL(elevator_init);
235
236void elevator_exit(struct elevator_queue *e)
237{
238    mutex_lock(&e->sysfs_lock);
239    if (e->type->ops.elevator_exit_fn)
240        e->type->ops.elevator_exit_fn(e);
241    mutex_unlock(&e->sysfs_lock);
242
243    kobject_put(&e->kobj);
244}
245EXPORT_SYMBOL(elevator_exit);
246
247static inline void __elv_rqhash_del(struct request *rq)
248{
249    hash_del(&rq->hash);
250    rq->cmd_flags &= ~REQ_HASHED;
251}
252
253static void elv_rqhash_del(struct request_queue *q, struct request *rq)
254{
255    if (ELV_ON_HASH(rq))
256        __elv_rqhash_del(rq);
257}
258
259static void elv_rqhash_add(struct request_queue *q, struct request *rq)
260{
261    struct elevator_queue *e = q->elevator;
262
263    BUG_ON(ELV_ON_HASH(rq));
264    hash_add(e->hash, &rq->hash, rq_hash_key(rq));
265    rq->cmd_flags |= REQ_HASHED;
266}
267
268static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
269{
270    __elv_rqhash_del(rq);
271    elv_rqhash_add(q, rq);
272}
273
274static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
275{
276    struct elevator_queue *e = q->elevator;
277    struct hlist_node *next;
278    struct request *rq;
279
280    hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
281        BUG_ON(!ELV_ON_HASH(rq));
282
283        if (unlikely(!rq_mergeable(rq))) {
284            __elv_rqhash_del(rq);
285            continue;
286        }
287
288        if (rq_hash_key(rq) == offset)
289            return rq;
290    }
291
292    return NULL;
293}
294
295/*
296 * RB-tree support functions for inserting/lookup/removal of requests
297 * in a sorted RB tree.
298 */
299void elv_rb_add(struct rb_root *root, struct request *rq)
300{
301    struct rb_node **p = &root->rb_node;
302    struct rb_node *parent = NULL;
303    struct request *__rq;
304
305    while (*p) {
306        parent = *p;
307        __rq = rb_entry(parent, struct request, rb_node);
308
309        if (blk_rq_pos(rq) < blk_rq_pos(__rq))
310            p = &(*p)->rb_left;
311        else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
312            p = &(*p)->rb_right;
313    }
314
315    rb_link_node(&rq->rb_node, parent, p);
316    rb_insert_color(&rq->rb_node, root);
317}
318EXPORT_SYMBOL(elv_rb_add);
319
320void elv_rb_del(struct rb_root *root, struct request *rq)
321{
322    BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
323    rb_erase(&rq->rb_node, root);
324    RB_CLEAR_NODE(&rq->rb_node);
325}
326EXPORT_SYMBOL(elv_rb_del);
327
328struct request *elv_rb_find(struct rb_root *root, sector_t sector)
329{
330    struct rb_node *n = root->rb_node;
331    struct request *rq;
332
333    while (n) {
334        rq = rb_entry(n, struct request, rb_node);
335
336        if (sector < blk_rq_pos(rq))
337            n = n->rb_left;
338        else if (sector > blk_rq_pos(rq))
339            n = n->rb_right;
340        else
341            return rq;
342    }
343
344    return NULL;
345}
346EXPORT_SYMBOL(elv_rb_find);
347
348/*
349 * Insert rq into dispatch queue of q. Queue lock must be held on
350 * entry. rq is sort instead into the dispatch queue. To be used by
351 * specific elevators.
352 */
353void elv_dispatch_sort(struct request_queue *q, struct request *rq)
354{
355    sector_t boundary;
356    struct list_head *entry;
357    int stop_flags;
358
359    if (q->last_merge == rq)
360        q->last_merge = NULL;
361
362    elv_rqhash_del(q, rq);
363
364    q->nr_sorted--;
365
366    boundary = q->end_sector;
367    stop_flags = REQ_SOFTBARRIER | REQ_STARTED;
368    list_for_each_prev(entry, &q->queue_head) {
369        struct request *pos = list_entry_rq(entry);
370
371        if ((rq->cmd_flags & REQ_DISCARD) !=
372            (pos->cmd_flags & REQ_DISCARD))
373            break;
374        if (rq_data_dir(rq) != rq_data_dir(pos))
375            break;
376        if (pos->cmd_flags & stop_flags)
377            break;
378        if (blk_rq_pos(rq) >= boundary) {
379            if (blk_rq_pos(pos) < boundary)
380                continue;
381        } else {
382            if (blk_rq_pos(pos) >= boundary)
383                break;
384        }
385        if (blk_rq_pos(rq) >= blk_rq_pos(pos))
386            break;
387    }
388
389    list_add(&rq->queuelist, entry);
390}
391EXPORT_SYMBOL(elv_dispatch_sort);
392
393/*
394 * Insert rq into dispatch queue of q. Queue lock must be held on
395 * entry. rq is added to the back of the dispatch queue. To be used by
396 * specific elevators.
397 */
398void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
399{
400    if (q->last_merge == rq)
401        q->last_merge = NULL;
402
403    elv_rqhash_del(q, rq);
404
405    q->nr_sorted--;
406
407    q->end_sector = rq_end_sector(rq);
408    q->boundary_rq = rq;
409    list_add_tail(&rq->queuelist, &q->queue_head);
410}
411EXPORT_SYMBOL(elv_dispatch_add_tail);
412
413int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
414{
415    struct elevator_queue *e = q->elevator;
416    struct request *__rq;
417    int ret;
418
419    /*
420     * Levels of merges:
421     * nomerges: No merges at all attempted
422     * noxmerges: Only simple one-hit cache try
423     * merges: All merge tries attempted
424     */
425    if (blk_queue_nomerges(q))
426        return ELEVATOR_NO_MERGE;
427
428    /*
429     * First try one-hit cache.
430     */
431    if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
432        ret = blk_try_merge(q->last_merge, bio);
433        if (ret != ELEVATOR_NO_MERGE) {
434            *req = q->last_merge;
435            return ret;
436        }
437    }
438
439    if (blk_queue_noxmerges(q))
440        return ELEVATOR_NO_MERGE;
441
442    /*
443     * See if our hash lookup can find a potential backmerge.
444     */
445    __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
446    if (__rq && elv_rq_merge_ok(__rq, bio)) {
447        *req = __rq;
448        return ELEVATOR_BACK_MERGE;
449    }
450
451    if (e->type->ops.elevator_merge_fn)
452        return e->type->ops.elevator_merge_fn(q, req, bio);
453
454    return ELEVATOR_NO_MERGE;
455}
456
457/*
458 * Attempt to do an insertion back merge. Only check for the case where
459 * we can append 'rq' to an existing request, so we can throw 'rq' away
460 * afterwards.
461 *
462 * Returns true if we merged, false otherwise
463 */
464static bool elv_attempt_insert_merge(struct request_queue *q,
465                     struct request *rq)
466{
467    struct request *__rq;
468    bool ret;
469
470    if (blk_queue_nomerges(q))
471        return false;
472
473    /*
474     * First try one-hit cache.
475     */
476    if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
477        return true;
478
479    if (blk_queue_noxmerges(q))
480        return false;
481
482    ret = false;
483    /*
484     * See if our hash lookup can find a potential backmerge.
485     */
486    while (1) {
487        __rq = elv_rqhash_find(q, blk_rq_pos(rq));
488        if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
489            break;
490
491        /* The merged request could be merged with others, try again */
492        ret = true;
493        rq = __rq;
494    }
495
496    return ret;
497}
498
499void elv_merged_request(struct request_queue *q, struct request *rq, int type)
500{
501    struct elevator_queue *e = q->elevator;
502
503    if (e->type->ops.elevator_merged_fn)
504        e->type->ops.elevator_merged_fn(q, rq, type);
505
506    if (type == ELEVATOR_BACK_MERGE)
507        elv_rqhash_reposition(q, rq);
508
509    q->last_merge = rq;
510}
511
512void elv_merge_requests(struct request_queue *q, struct request *rq,
513                 struct request *next)
514{
515    struct elevator_queue *e = q->elevator;
516    const int next_sorted = next->cmd_flags & REQ_SORTED;
517
518    if (next_sorted && e->type->ops.elevator_merge_req_fn)
519        e->type->ops.elevator_merge_req_fn(q, rq, next);
520
521    elv_rqhash_reposition(q, rq);
522
523    if (next_sorted) {
524        elv_rqhash_del(q, next);
525        q->nr_sorted--;
526    }
527
528    q->last_merge = rq;
529}
530
531void elv_bio_merged(struct request_queue *q, struct request *rq,
532            struct bio *bio)
533{
534    struct elevator_queue *e = q->elevator;
535
536    if (e->type->ops.elevator_bio_merged_fn)
537        e->type->ops.elevator_bio_merged_fn(q, rq, bio);
538}
539
540#ifdef CONFIG_PM_RUNTIME
541static void blk_pm_requeue_request(struct request *rq)
542{
543    if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
544        rq->q->nr_pending--;
545}
546
547static void blk_pm_add_request(struct request_queue *q, struct request *rq)
548{
549    if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
550        (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
551        pm_request_resume(q->dev);
552}
553#else
554static inline void blk_pm_requeue_request(struct request *rq) {}
555static inline void blk_pm_add_request(struct request_queue *q,
556                      struct request *rq)
557{
558}
559#endif
560
561void elv_requeue_request(struct request_queue *q, struct request *rq)
562{
563    /*
564     * it already went through dequeue, we need to decrement the
565     * in_flight count again
566     */
567    if (blk_account_rq(rq)) {
568        q->in_flight[rq_is_sync(rq)]--;
569        if (rq->cmd_flags & REQ_SORTED)
570            elv_deactivate_rq(q, rq);
571    }
572
573    rq->cmd_flags &= ~REQ_STARTED;
574
575    blk_pm_requeue_request(rq);
576
577    __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
578}
579
580void elv_drain_elevator(struct request_queue *q)
581{
582    static int printed;
583
584    lockdep_assert_held(q->queue_lock);
585
586    while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
587        ;
588    if (q->nr_sorted && printed++ < 10) {
589        printk(KERN_ERR "%s: forced dispatching is broken "
590               "(nr_sorted=%u), please report this\n",
591               q->elevator->type->elevator_name, q->nr_sorted);
592    }
593}
594
595void __elv_add_request(struct request_queue *q, struct request *rq, int where)
596{
597    trace_block_rq_insert(q, rq);
598
599    blk_pm_add_request(q, rq);
600
601    rq->q = q;
602
603    if (rq->cmd_flags & REQ_SOFTBARRIER) {
604        /* barriers are scheduling boundary, update end_sector */
605        if (rq->cmd_type == REQ_TYPE_FS) {
606            q->end_sector = rq_end_sector(rq);
607            q->boundary_rq = rq;
608        }
609    } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
610            (where == ELEVATOR_INSERT_SORT ||
611             where == ELEVATOR_INSERT_SORT_MERGE))
612        where = ELEVATOR_INSERT_BACK;
613
614    switch (where) {
615    case ELEVATOR_INSERT_REQUEUE:
616    case ELEVATOR_INSERT_FRONT:
617        rq->cmd_flags |= REQ_SOFTBARRIER;
618        list_add(&rq->queuelist, &q->queue_head);
619        break;
620
621    case ELEVATOR_INSERT_BACK:
622        rq->cmd_flags |= REQ_SOFTBARRIER;
623        elv_drain_elevator(q);
624        list_add_tail(&rq->queuelist, &q->queue_head);
625        /*
626         * We kick the queue here for the following reasons.
627         * - The elevator might have returned NULL previously
628         * to delay requests and returned them now. As the
629         * queue wasn't empty before this request, ll_rw_blk
630         * won't run the queue on return, resulting in hang.
631         * - Usually, back inserted requests won't be merged
632         * with anything. There's no point in delaying queue
633         * processing.
634         */
635        __blk_run_queue(q);
636        break;
637
638    case ELEVATOR_INSERT_SORT_MERGE:
639        /*
640         * If we succeed in merging this request with one in the
641         * queue already, we are done - rq has now been freed,
642         * so no need to do anything further.
643         */
644        if (elv_attempt_insert_merge(q, rq))
645            break;
646    case ELEVATOR_INSERT_SORT:
647        BUG_ON(rq->cmd_type != REQ_TYPE_FS);
648        rq->cmd_flags |= REQ_SORTED;
649        q->nr_sorted++;
650        if (rq_mergeable(rq)) {
651            elv_rqhash_add(q, rq);
652            if (!q->last_merge)
653                q->last_merge = rq;
654        }
655
656        /*
657         * Some ioscheds (cfq) run q->request_fn directly, so
658         * rq cannot be accessed after calling
659         * elevator_add_req_fn.
660         */
661        q->elevator->type->ops.elevator_add_req_fn(q, rq);
662        break;
663
664    case ELEVATOR_INSERT_FLUSH:
665        rq->cmd_flags |= REQ_SOFTBARRIER;
666        blk_insert_flush(rq);
667        break;
668    default:
669        printk(KERN_ERR "%s: bad insertion point %d\n",
670               __func__, where);
671        BUG();
672    }
673}
674EXPORT_SYMBOL(__elv_add_request);
675
676void elv_add_request(struct request_queue *q, struct request *rq, int where)
677{
678    unsigned long flags;
679
680    spin_lock_irqsave(q->queue_lock, flags);
681    __elv_add_request(q, rq, where);
682    spin_unlock_irqrestore(q->queue_lock, flags);
683}
684EXPORT_SYMBOL(elv_add_request);
685
686struct request *elv_latter_request(struct request_queue *q, struct request *rq)
687{
688    struct elevator_queue *e = q->elevator;
689
690    if (e->type->ops.elevator_latter_req_fn)
691        return e->type->ops.elevator_latter_req_fn(q, rq);
692    return NULL;
693}
694
695struct request *elv_former_request(struct request_queue *q, struct request *rq)
696{
697    struct elevator_queue *e = q->elevator;
698
699    if (e->type->ops.elevator_former_req_fn)
700        return e->type->ops.elevator_former_req_fn(q, rq);
701    return NULL;
702}
703
704int elv_set_request(struct request_queue *q, struct request *rq,
705            struct bio *bio, gfp_t gfp_mask)
706{
707    struct elevator_queue *e = q->elevator;
708
709    if (e->type->ops.elevator_set_req_fn)
710        return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
711    return 0;
712}
713
714void elv_put_request(struct request_queue *q, struct request *rq)
715{
716    struct elevator_queue *e = q->elevator;
717
718    if (e->type->ops.elevator_put_req_fn)
719        e->type->ops.elevator_put_req_fn(rq);
720}
721
722int elv_may_queue(struct request_queue *q, int rw)
723{
724    struct elevator_queue *e = q->elevator;
725
726    if (e->type->ops.elevator_may_queue_fn)
727        return e->type->ops.elevator_may_queue_fn(q, rw);
728
729    return ELV_MQUEUE_MAY;
730}
731
732void elv_completed_request(struct request_queue *q, struct request *rq)
733{
734    struct elevator_queue *e = q->elevator;
735
736    /*
737     * request is released from the driver, io must be done
738     */
739    if (blk_account_rq(rq)) {
740        q->in_flight[rq_is_sync(rq)]--;
741        if ((rq->cmd_flags & REQ_SORTED) &&
742            e->type->ops.elevator_completed_req_fn)
743            e->type->ops.elevator_completed_req_fn(q, rq);
744    }
745}
746
747#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
748
749static ssize_t
750elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
751{
752    struct elv_fs_entry *entry = to_elv(attr);
753    struct elevator_queue *e;
754    ssize_t error;
755
756    if (!entry->show)
757        return -EIO;
758
759    e = container_of(kobj, struct elevator_queue, kobj);
760    mutex_lock(&e->sysfs_lock);
761    error = e->type ? entry->show(e, page) : -ENOENT;
762    mutex_unlock(&e->sysfs_lock);
763    return error;
764}
765
766static ssize_t
767elv_attr_store(struct kobject *kobj, struct attribute *attr,
768           const char *page, size_t length)
769{
770    struct elv_fs_entry *entry = to_elv(attr);
771    struct elevator_queue *e;
772    ssize_t error;
773
774    if (!entry->store)
775        return -EIO;
776
777    e = container_of(kobj, struct elevator_queue, kobj);
778    mutex_lock(&e->sysfs_lock);
779    error = e->type ? entry->store(e, page, length) : -ENOENT;
780    mutex_unlock(&e->sysfs_lock);
781    return error;
782}
783
784static const struct sysfs_ops elv_sysfs_ops = {
785    .show = elv_attr_show,
786    .store = elv_attr_store,
787};
788
789static struct kobj_type elv_ktype = {
790    .sysfs_ops = &elv_sysfs_ops,
791    .release = elevator_release,
792};
793
794int elv_register_queue(struct request_queue *q)
795{
796    struct elevator_queue *e = q->elevator;
797    int error;
798
799    error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
800    if (!error) {
801        struct elv_fs_entry *attr = e->type->elevator_attrs;
802        if (attr) {
803            while (attr->attr.name) {
804                if (sysfs_create_file(&e->kobj, &attr->attr))
805                    break;
806                attr++;
807            }
808        }
809        kobject_uevent(&e->kobj, KOBJ_ADD);
810        e->registered = 1;
811    }
812    return error;
813}
814EXPORT_SYMBOL(elv_register_queue);
815
816void elv_unregister_queue(struct request_queue *q)
817{
818    if (q) {
819        struct elevator_queue *e = q->elevator;
820
821        kobject_uevent(&e->kobj, KOBJ_REMOVE);
822        kobject_del(&e->kobj);
823        e->registered = 0;
824    }
825}
826EXPORT_SYMBOL(elv_unregister_queue);
827
828int elv_register(struct elevator_type *e)
829{
830    char *def = "";
831
832    /* create icq_cache if requested */
833    if (e->icq_size) {
834        if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
835            WARN_ON(e->icq_align < __alignof__(struct io_cq)))
836            return -EINVAL;
837
838        snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
839             "%s_io_cq", e->elevator_name);
840        e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
841                         e->icq_align, 0, NULL);
842        if (!e->icq_cache)
843            return -ENOMEM;
844    }
845
846    /* register, don't allow duplicate names */
847    spin_lock(&elv_list_lock);
848    if (elevator_find(e->elevator_name)) {
849        spin_unlock(&elv_list_lock);
850        if (e->icq_cache)
851            kmem_cache_destroy(e->icq_cache);
852        return -EBUSY;
853    }
854    list_add_tail(&e->list, &elv_list);
855    spin_unlock(&elv_list_lock);
856
857    /* print pretty message */
858    if (!strcmp(e->elevator_name, chosen_elevator) ||
859            (!*chosen_elevator &&
860             !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
861                def = " (default)";
862
863    printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
864                                def);
865    return 0;
866}
867EXPORT_SYMBOL_GPL(elv_register);
868
869void elv_unregister(struct elevator_type *e)
870{
871    /* unregister */
872    spin_lock(&elv_list_lock);
873    list_del_init(&e->list);
874    spin_unlock(&elv_list_lock);
875
876    /*
877     * Destroy icq_cache if it exists. icq's are RCU managed. Make
878     * sure all RCU operations are complete before proceeding.
879     */
880    if (e->icq_cache) {
881        rcu_barrier();
882        kmem_cache_destroy(e->icq_cache);
883        e->icq_cache = NULL;
884    }
885}
886EXPORT_SYMBOL_GPL(elv_unregister);
887
888/*
889 * switch to new_e io scheduler. be careful not to introduce deadlocks -
890 * we don't free the old io scheduler, before we have allocated what we
891 * need for the new one. this way we have a chance of going back to the old
892 * one, if the new one fails init for some reason.
893 */
894static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
895{
896    struct elevator_queue *old = q->elevator;
897    bool registered = old->registered;
898    int err;
899
900    /*
901     * Turn on BYPASS and drain all requests w/ elevator private data.
902     * Block layer doesn't call into a quiesced elevator - all requests
903     * are directly put on the dispatch list without elevator data
904     * using INSERT_BACK. All requests have SOFTBARRIER set and no
905     * merge happens either.
906     */
907    blk_queue_bypass_start(q);
908
909    /* unregister and clear all auxiliary data of the old elevator */
910    if (registered)
911        elv_unregister_queue(q);
912
913    spin_lock_irq(q->queue_lock);
914    ioc_clear_queue(q);
915    spin_unlock_irq(q->queue_lock);
916
917    /* allocate, init and register new elevator */
918    err = new_e->ops.elevator_init_fn(q, new_e);
919    if (err)
920        goto fail_init;
921
922    if (registered) {
923        err = elv_register_queue(q);
924        if (err)
925            goto fail_register;
926    }
927
928    /* done, kill the old one and finish */
929    elevator_exit(old);
930    blk_queue_bypass_end(q);
931
932    blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
933
934    return 0;
935
936fail_register:
937    elevator_exit(q->elevator);
938fail_init:
939    /* switch failed, restore and re-register old elevator */
940    q->elevator = old;
941    elv_register_queue(q);
942    blk_queue_bypass_end(q);
943
944    return err;
945}
946
947/*
948 * Switch this queue to the given IO scheduler.
949 */
950static int __elevator_change(struct request_queue *q, const char *name)
951{
952    char elevator_name[ELV_NAME_MAX];
953    struct elevator_type *e;
954
955    if (!q->elevator)
956        return -ENXIO;
957
958    strlcpy(elevator_name, name, sizeof(elevator_name));
959    e = elevator_get(strstrip(elevator_name), true);
960    if (!e) {
961        printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
962        return -EINVAL;
963    }
964
965    if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
966        elevator_put(e);
967        return 0;
968    }
969
970    return elevator_switch(q, e);
971}
972
973int elevator_change(struct request_queue *q, const char *name)
974{
975    int ret;
976
977    /* Protect q->elevator from elevator_init() */
978    mutex_lock(&q->sysfs_lock);
979    ret = __elevator_change(q, name);
980    mutex_unlock(&q->sysfs_lock);
981
982    return ret;
983}
984EXPORT_SYMBOL(elevator_change);
985
986ssize_t elv_iosched_store(struct request_queue *q, const char *name,
987              size_t count)
988{
989    int ret;
990
991    if (!q->elevator)
992        return count;
993
994    ret = __elevator_change(q, name);
995    if (!ret)
996        return count;
997
998    printk(KERN_ERR "elevator: switch to %s failed\n", name);
999    return ret;
1000}
1001
1002ssize_t elv_iosched_show(struct request_queue *q, char *name)
1003{
1004    struct elevator_queue *e = q->elevator;
1005    struct elevator_type *elv;
1006    struct elevator_type *__e;
1007    int len = 0;
1008
1009    if (!q->elevator || !blk_queue_stackable(q))
1010        return sprintf(name, "none\n");
1011
1012    elv = e->type;
1013
1014    spin_lock(&elv_list_lock);
1015    list_for_each_entry(__e, &elv_list, list) {
1016        if (!strcmp(elv->elevator_name, __e->elevator_name))
1017            len += sprintf(name+len, "[%s] ", elv->elevator_name);
1018        else
1019            len += sprintf(name+len, "%s ", __e->elevator_name);
1020    }
1021    spin_unlock(&elv_list_lock);
1022
1023    len += sprintf(len+name, "\n");
1024    return len;
1025}
1026
1027struct request *elv_rb_former_request(struct request_queue *q,
1028                      struct request *rq)
1029{
1030    struct rb_node *rbprev = rb_prev(&rq->rb_node);
1031
1032    if (rbprev)
1033        return rb_entry_rq(rbprev);
1034
1035    return NULL;
1036}
1037EXPORT_SYMBOL(elv_rb_former_request);
1038
1039struct request *elv_rb_latter_request(struct request_queue *q,
1040                      struct request *rq)
1041{
1042    struct rb_node *rbnext = rb_next(&rq->rb_node);
1043
1044    if (rbnext)
1045        return rb_entry_rq(rbnext);
1046
1047    return NULL;
1048}
1049EXPORT_SYMBOL(elv_rb_latter_request);
1050

Archive Download this file



interactive