Root/
1 | /* |
2 | * Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved. |
3 | * |
4 | * Module Author: Kiyoshi Ueda |
5 | * |
6 | * This file is released under the GPL. |
7 | * |
8 | * Throughput oriented path selector. |
9 | */ |
10 | |
11 | #include "dm.h" |
12 | #include "dm-path-selector.h" |
13 | |
14 | #define DM_MSG_PREFIX "multipath service-time" |
15 | #define ST_MIN_IO 1 |
16 | #define ST_MAX_RELATIVE_THROUGHPUT 100 |
17 | #define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7 |
18 | #define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT) |
19 | #define ST_VERSION "0.2.0" |
20 | |
21 | struct selector { |
22 | struct list_head valid_paths; |
23 | struct list_head failed_paths; |
24 | }; |
25 | |
26 | struct path_info { |
27 | struct list_head list; |
28 | struct dm_path *path; |
29 | unsigned repeat_count; |
30 | unsigned relative_throughput; |
31 | atomic_t in_flight_size; /* Total size of in-flight I/Os */ |
32 | }; |
33 | |
34 | static struct selector *alloc_selector(void) |
35 | { |
36 | struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); |
37 | |
38 | if (s) { |
39 | INIT_LIST_HEAD(&s->valid_paths); |
40 | INIT_LIST_HEAD(&s->failed_paths); |
41 | } |
42 | |
43 | return s; |
44 | } |
45 | |
46 | static int st_create(struct path_selector *ps, unsigned argc, char **argv) |
47 | { |
48 | struct selector *s = alloc_selector(); |
49 | |
50 | if (!s) |
51 | return -ENOMEM; |
52 | |
53 | ps->context = s; |
54 | return 0; |
55 | } |
56 | |
57 | static void free_paths(struct list_head *paths) |
58 | { |
59 | struct path_info *pi, *next; |
60 | |
61 | list_for_each_entry_safe(pi, next, paths, list) { |
62 | list_del(&pi->list); |
63 | kfree(pi); |
64 | } |
65 | } |
66 | |
67 | static void st_destroy(struct path_selector *ps) |
68 | { |
69 | struct selector *s = ps->context; |
70 | |
71 | free_paths(&s->valid_paths); |
72 | free_paths(&s->failed_paths); |
73 | kfree(s); |
74 | ps->context = NULL; |
75 | } |
76 | |
77 | static int st_status(struct path_selector *ps, struct dm_path *path, |
78 | status_type_t type, char *result, unsigned maxlen) |
79 | { |
80 | unsigned sz = 0; |
81 | struct path_info *pi; |
82 | |
83 | if (!path) |
84 | DMEMIT("0 "); |
85 | else { |
86 | pi = path->pscontext; |
87 | |
88 | switch (type) { |
89 | case STATUSTYPE_INFO: |
90 | DMEMIT("%d %u ", atomic_read(&pi->in_flight_size), |
91 | pi->relative_throughput); |
92 | break; |
93 | case STATUSTYPE_TABLE: |
94 | DMEMIT("%u %u ", pi->repeat_count, |
95 | pi->relative_throughput); |
96 | break; |
97 | } |
98 | } |
99 | |
100 | return sz; |
101 | } |
102 | |
103 | static int st_add_path(struct path_selector *ps, struct dm_path *path, |
104 | int argc, char **argv, char **error) |
105 | { |
106 | struct selector *s = ps->context; |
107 | struct path_info *pi; |
108 | unsigned repeat_count = ST_MIN_IO; |
109 | unsigned relative_throughput = 1; |
110 | |
111 | /* |
112 | * Arguments: [<repeat_count> [<relative_throughput>]] |
113 | * <repeat_count>: The number of I/Os before switching path. |
114 | * If not given, default (ST_MIN_IO) is used. |
115 | * <relative_throughput>: The relative throughput value of |
116 | * the path among all paths in the path-group. |
117 | * The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT> |
118 | * If not given, minimum value '1' is used. |
119 | * If '0' is given, the path isn't selected while |
120 | * other paths having a positive value are |
121 | * available. |
122 | */ |
123 | if (argc > 2) { |
124 | *error = "service-time ps: incorrect number of arguments"; |
125 | return -EINVAL; |
126 | } |
127 | |
128 | if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) { |
129 | *error = "service-time ps: invalid repeat count"; |
130 | return -EINVAL; |
131 | } |
132 | |
133 | if ((argc == 2) && |
134 | (sscanf(argv[1], "%u", &relative_throughput) != 1 || |
135 | relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) { |
136 | *error = "service-time ps: invalid relative_throughput value"; |
137 | return -EINVAL; |
138 | } |
139 | |
140 | /* allocate the path */ |
141 | pi = kmalloc(sizeof(*pi), GFP_KERNEL); |
142 | if (!pi) { |
143 | *error = "service-time ps: Error allocating path context"; |
144 | return -ENOMEM; |
145 | } |
146 | |
147 | pi->path = path; |
148 | pi->repeat_count = repeat_count; |
149 | pi->relative_throughput = relative_throughput; |
150 | atomic_set(&pi->in_flight_size, 0); |
151 | |
152 | path->pscontext = pi; |
153 | |
154 | list_add_tail(&pi->list, &s->valid_paths); |
155 | |
156 | return 0; |
157 | } |
158 | |
159 | static void st_fail_path(struct path_selector *ps, struct dm_path *path) |
160 | { |
161 | struct selector *s = ps->context; |
162 | struct path_info *pi = path->pscontext; |
163 | |
164 | list_move(&pi->list, &s->failed_paths); |
165 | } |
166 | |
167 | static int st_reinstate_path(struct path_selector *ps, struct dm_path *path) |
168 | { |
169 | struct selector *s = ps->context; |
170 | struct path_info *pi = path->pscontext; |
171 | |
172 | list_move_tail(&pi->list, &s->valid_paths); |
173 | |
174 | return 0; |
175 | } |
176 | |
177 | /* |
178 | * Compare the estimated service time of 2 paths, pi1 and pi2, |
179 | * for the incoming I/O. |
180 | * |
181 | * Returns: |
182 | * < 0 : pi1 is better |
183 | * 0 : no difference between pi1 and pi2 |
184 | * > 0 : pi2 is better |
185 | * |
186 | * Description: |
187 | * Basically, the service time is estimated by: |
188 | * ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput' |
189 | * To reduce the calculation, some optimizations are made. |
190 | * (See comments inline) |
191 | */ |
192 | static int st_compare_load(struct path_info *pi1, struct path_info *pi2, |
193 | size_t incoming) |
194 | { |
195 | size_t sz1, sz2, st1, st2; |
196 | |
197 | sz1 = atomic_read(&pi1->in_flight_size); |
198 | sz2 = atomic_read(&pi2->in_flight_size); |
199 | |
200 | /* |
201 | * Case 1: Both have same throughput value. Choose less loaded path. |
202 | */ |
203 | if (pi1->relative_throughput == pi2->relative_throughput) |
204 | return sz1 - sz2; |
205 | |
206 | /* |
207 | * Case 2a: Both have same load. Choose higher throughput path. |
208 | * Case 2b: One path has no throughput value. Choose the other one. |
209 | */ |
210 | if (sz1 == sz2 || |
211 | !pi1->relative_throughput || !pi2->relative_throughput) |
212 | return pi2->relative_throughput - pi1->relative_throughput; |
213 | |
214 | /* |
215 | * Case 3: Calculate service time. Choose faster path. |
216 | * Service time using pi1: |
217 | * st1 = (sz1 + incoming) / pi1->relative_throughput |
218 | * Service time using pi2: |
219 | * st2 = (sz2 + incoming) / pi2->relative_throughput |
220 | * |
221 | * To avoid the division, transform the expression to use |
222 | * multiplication. |
223 | * Because ->relative_throughput > 0 here, if st1 < st2, |
224 | * the expressions below are the same meaning: |
225 | * (sz1 + incoming) / pi1->relative_throughput < |
226 | * (sz2 + incoming) / pi2->relative_throughput |
227 | * (sz1 + incoming) * pi2->relative_throughput < |
228 | * (sz2 + incoming) * pi1->relative_throughput |
229 | * So use the later one. |
230 | */ |
231 | sz1 += incoming; |
232 | sz2 += incoming; |
233 | if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE || |
234 | sz2 >= ST_MAX_INFLIGHT_SIZE)) { |
235 | /* |
236 | * Size may be too big for multiplying pi->relative_throughput |
237 | * and overflow. |
238 | * To avoid the overflow and mis-selection, shift down both. |
239 | */ |
240 | sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; |
241 | sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; |
242 | } |
243 | st1 = sz1 * pi2->relative_throughput; |
244 | st2 = sz2 * pi1->relative_throughput; |
245 | if (st1 != st2) |
246 | return st1 - st2; |
247 | |
248 | /* |
249 | * Case 4: Service time is equal. Choose higher throughput path. |
250 | */ |
251 | return pi2->relative_throughput - pi1->relative_throughput; |
252 | } |
253 | |
254 | static struct dm_path *st_select_path(struct path_selector *ps, |
255 | unsigned *repeat_count, size_t nr_bytes) |
256 | { |
257 | struct selector *s = ps->context; |
258 | struct path_info *pi = NULL, *best = NULL; |
259 | |
260 | if (list_empty(&s->valid_paths)) |
261 | return NULL; |
262 | |
263 | /* Change preferred (first in list) path to evenly balance. */ |
264 | list_move_tail(s->valid_paths.next, &s->valid_paths); |
265 | |
266 | list_for_each_entry(pi, &s->valid_paths, list) |
267 | if (!best || (st_compare_load(pi, best, nr_bytes) < 0)) |
268 | best = pi; |
269 | |
270 | if (!best) |
271 | return NULL; |
272 | |
273 | *repeat_count = best->repeat_count; |
274 | |
275 | return best->path; |
276 | } |
277 | |
278 | static int st_start_io(struct path_selector *ps, struct dm_path *path, |
279 | size_t nr_bytes) |
280 | { |
281 | struct path_info *pi = path->pscontext; |
282 | |
283 | atomic_add(nr_bytes, &pi->in_flight_size); |
284 | |
285 | return 0; |
286 | } |
287 | |
288 | static int st_end_io(struct path_selector *ps, struct dm_path *path, |
289 | size_t nr_bytes) |
290 | { |
291 | struct path_info *pi = path->pscontext; |
292 | |
293 | atomic_sub(nr_bytes, &pi->in_flight_size); |
294 | |
295 | return 0; |
296 | } |
297 | |
298 | static struct path_selector_type st_ps = { |
299 | .name = "service-time", |
300 | .module = THIS_MODULE, |
301 | .table_args = 2, |
302 | .info_args = 2, |
303 | .create = st_create, |
304 | .destroy = st_destroy, |
305 | .status = st_status, |
306 | .add_path = st_add_path, |
307 | .fail_path = st_fail_path, |
308 | .reinstate_path = st_reinstate_path, |
309 | .select_path = st_select_path, |
310 | .start_io = st_start_io, |
311 | .end_io = st_end_io, |
312 | }; |
313 | |
314 | static int __init dm_st_init(void) |
315 | { |
316 | int r = dm_register_path_selector(&st_ps); |
317 | |
318 | if (r < 0) |
319 | DMERR("register failed %d", r); |
320 | |
321 | DMINFO("version " ST_VERSION " loaded"); |
322 | |
323 | return r; |
324 | } |
325 | |
326 | static void __exit dm_st_exit(void) |
327 | { |
328 | int r = dm_unregister_path_selector(&st_ps); |
329 | |
330 | if (r < 0) |
331 | DMERR("unregister failed %d", r); |
332 | } |
333 | |
334 | module_init(dm_st_init); |
335 | module_exit(dm_st_exit); |
336 | |
337 | MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector"); |
338 | MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>"); |
339 | MODULE_LICENSE("GPL"); |
340 |
Branches:
ben-wpan
ben-wpan-stefan
javiroman/ks7010
jz-2.6.34
jz-2.6.34-rc5
jz-2.6.34-rc6
jz-2.6.34-rc7
jz-2.6.35
jz-2.6.36
jz-2.6.37
jz-2.6.38
jz-2.6.39
jz-3.0
jz-3.1
jz-3.11
jz-3.12
jz-3.13
jz-3.15
jz-3.16
jz-3.18-dt
jz-3.2
jz-3.3
jz-3.4
jz-3.5
jz-3.6
jz-3.6-rc2-pwm
jz-3.9
jz-3.9-clk
jz-3.9-rc8
jz47xx
jz47xx-2.6.38
master
Tags:
od-2011-09-04
od-2011-09-18
v2.6.34-rc5
v2.6.34-rc6
v2.6.34-rc7
v3.9