Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/md/dm-service-time.c
15111 views
1
/*
2
* Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved.
3
*
4
* Module Author: Kiyoshi Ueda
5
*
6
* This file is released under the GPL.
7
*
8
* Throughput oriented path selector.
9
*/
10
11
#include "dm.h"
12
#include "dm-path-selector.h"
13
14
#include <linux/slab.h>
15
16
#define DM_MSG_PREFIX "multipath service-time"
17
#define ST_MIN_IO 1
18
#define ST_MAX_RELATIVE_THROUGHPUT 100
19
#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7
20
#define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
21
#define ST_VERSION "0.2.0"
22
23
struct selector {
24
struct list_head valid_paths;
25
struct list_head failed_paths;
26
};
27
28
struct path_info {
29
struct list_head list;
30
struct dm_path *path;
31
unsigned repeat_count;
32
unsigned relative_throughput;
33
atomic_t in_flight_size; /* Total size of in-flight I/Os */
34
};
35
36
static struct selector *alloc_selector(void)
37
{
38
struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
39
40
if (s) {
41
INIT_LIST_HEAD(&s->valid_paths);
42
INIT_LIST_HEAD(&s->failed_paths);
43
}
44
45
return s;
46
}
47
48
static int st_create(struct path_selector *ps, unsigned argc, char **argv)
49
{
50
struct selector *s = alloc_selector();
51
52
if (!s)
53
return -ENOMEM;
54
55
ps->context = s;
56
return 0;
57
}
58
59
static void free_paths(struct list_head *paths)
60
{
61
struct path_info *pi, *next;
62
63
list_for_each_entry_safe(pi, next, paths, list) {
64
list_del(&pi->list);
65
kfree(pi);
66
}
67
}
68
69
static void st_destroy(struct path_selector *ps)
70
{
71
struct selector *s = ps->context;
72
73
free_paths(&s->valid_paths);
74
free_paths(&s->failed_paths);
75
kfree(s);
76
ps->context = NULL;
77
}
78
79
static int st_status(struct path_selector *ps, struct dm_path *path,
80
status_type_t type, char *result, unsigned maxlen)
81
{
82
unsigned sz = 0;
83
struct path_info *pi;
84
85
if (!path)
86
DMEMIT("0 ");
87
else {
88
pi = path->pscontext;
89
90
switch (type) {
91
case STATUSTYPE_INFO:
92
DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
93
pi->relative_throughput);
94
break;
95
case STATUSTYPE_TABLE:
96
DMEMIT("%u %u ", pi->repeat_count,
97
pi->relative_throughput);
98
break;
99
}
100
}
101
102
return sz;
103
}
104
105
static int st_add_path(struct path_selector *ps, struct dm_path *path,
106
int argc, char **argv, char **error)
107
{
108
struct selector *s = ps->context;
109
struct path_info *pi;
110
unsigned repeat_count = ST_MIN_IO;
111
unsigned relative_throughput = 1;
112
113
/*
114
* Arguments: [<repeat_count> [<relative_throughput>]]
115
* <repeat_count>: The number of I/Os before switching path.
116
* If not given, default (ST_MIN_IO) is used.
117
* <relative_throughput>: The relative throughput value of
118
* the path among all paths in the path-group.
119
* The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
120
* If not given, minimum value '1' is used.
121
* If '0' is given, the path isn't selected while
122
* other paths having a positive value are
123
* available.
124
*/
125
if (argc > 2) {
126
*error = "service-time ps: incorrect number of arguments";
127
return -EINVAL;
128
}
129
130
if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
131
*error = "service-time ps: invalid repeat count";
132
return -EINVAL;
133
}
134
135
if ((argc == 2) &&
136
(sscanf(argv[1], "%u", &relative_throughput) != 1 ||
137
relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
138
*error = "service-time ps: invalid relative_throughput value";
139
return -EINVAL;
140
}
141
142
/* allocate the path */
143
pi = kmalloc(sizeof(*pi), GFP_KERNEL);
144
if (!pi) {
145
*error = "service-time ps: Error allocating path context";
146
return -ENOMEM;
147
}
148
149
pi->path = path;
150
pi->repeat_count = repeat_count;
151
pi->relative_throughput = relative_throughput;
152
atomic_set(&pi->in_flight_size, 0);
153
154
path->pscontext = pi;
155
156
list_add_tail(&pi->list, &s->valid_paths);
157
158
return 0;
159
}
160
161
static void st_fail_path(struct path_selector *ps, struct dm_path *path)
162
{
163
struct selector *s = ps->context;
164
struct path_info *pi = path->pscontext;
165
166
list_move(&pi->list, &s->failed_paths);
167
}
168
169
static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
170
{
171
struct selector *s = ps->context;
172
struct path_info *pi = path->pscontext;
173
174
list_move_tail(&pi->list, &s->valid_paths);
175
176
return 0;
177
}
178
179
/*
180
* Compare the estimated service time of 2 paths, pi1 and pi2,
181
* for the incoming I/O.
182
*
183
* Returns:
184
* < 0 : pi1 is better
185
* 0 : no difference between pi1 and pi2
186
* > 0 : pi2 is better
187
*
188
* Description:
189
* Basically, the service time is estimated by:
190
* ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
191
* To reduce the calculation, some optimizations are made.
192
* (See comments inline)
193
*/
194
static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
195
size_t incoming)
196
{
197
size_t sz1, sz2, st1, st2;
198
199
sz1 = atomic_read(&pi1->in_flight_size);
200
sz2 = atomic_read(&pi2->in_flight_size);
201
202
/*
203
* Case 1: Both have same throughput value. Choose less loaded path.
204
*/
205
if (pi1->relative_throughput == pi2->relative_throughput)
206
return sz1 - sz2;
207
208
/*
209
* Case 2a: Both have same load. Choose higher throughput path.
210
* Case 2b: One path has no throughput value. Choose the other one.
211
*/
212
if (sz1 == sz2 ||
213
!pi1->relative_throughput || !pi2->relative_throughput)
214
return pi2->relative_throughput - pi1->relative_throughput;
215
216
/*
217
* Case 3: Calculate service time. Choose faster path.
218
* Service time using pi1:
219
* st1 = (sz1 + incoming) / pi1->relative_throughput
220
* Service time using pi2:
221
* st2 = (sz2 + incoming) / pi2->relative_throughput
222
*
223
* To avoid the division, transform the expression to use
224
* multiplication.
225
* Because ->relative_throughput > 0 here, if st1 < st2,
226
* the expressions below are the same meaning:
227
* (sz1 + incoming) / pi1->relative_throughput <
228
* (sz2 + incoming) / pi2->relative_throughput
229
* (sz1 + incoming) * pi2->relative_throughput <
230
* (sz2 + incoming) * pi1->relative_throughput
231
* So use the later one.
232
*/
233
sz1 += incoming;
234
sz2 += incoming;
235
if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
236
sz2 >= ST_MAX_INFLIGHT_SIZE)) {
237
/*
238
* Size may be too big for multiplying pi->relative_throughput
239
* and overflow.
240
* To avoid the overflow and mis-selection, shift down both.
241
*/
242
sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
243
sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
244
}
245
st1 = sz1 * pi2->relative_throughput;
246
st2 = sz2 * pi1->relative_throughput;
247
if (st1 != st2)
248
return st1 - st2;
249
250
/*
251
* Case 4: Service time is equal. Choose higher throughput path.
252
*/
253
return pi2->relative_throughput - pi1->relative_throughput;
254
}
255
256
static struct dm_path *st_select_path(struct path_selector *ps,
257
unsigned *repeat_count, size_t nr_bytes)
258
{
259
struct selector *s = ps->context;
260
struct path_info *pi = NULL, *best = NULL;
261
262
if (list_empty(&s->valid_paths))
263
return NULL;
264
265
/* Change preferred (first in list) path to evenly balance. */
266
list_move_tail(s->valid_paths.next, &s->valid_paths);
267
268
list_for_each_entry(pi, &s->valid_paths, list)
269
if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
270
best = pi;
271
272
if (!best)
273
return NULL;
274
275
*repeat_count = best->repeat_count;
276
277
return best->path;
278
}
279
280
static int st_start_io(struct path_selector *ps, struct dm_path *path,
281
size_t nr_bytes)
282
{
283
struct path_info *pi = path->pscontext;
284
285
atomic_add(nr_bytes, &pi->in_flight_size);
286
287
return 0;
288
}
289
290
static int st_end_io(struct path_selector *ps, struct dm_path *path,
291
size_t nr_bytes)
292
{
293
struct path_info *pi = path->pscontext;
294
295
atomic_sub(nr_bytes, &pi->in_flight_size);
296
297
return 0;
298
}
299
300
static struct path_selector_type st_ps = {
301
.name = "service-time",
302
.module = THIS_MODULE,
303
.table_args = 2,
304
.info_args = 2,
305
.create = st_create,
306
.destroy = st_destroy,
307
.status = st_status,
308
.add_path = st_add_path,
309
.fail_path = st_fail_path,
310
.reinstate_path = st_reinstate_path,
311
.select_path = st_select_path,
312
.start_io = st_start_io,
313
.end_io = st_end_io,
314
};
315
316
static int __init dm_st_init(void)
317
{
318
int r = dm_register_path_selector(&st_ps);
319
320
if (r < 0)
321
DMERR("register failed %d", r);
322
323
DMINFO("version " ST_VERSION " loaded");
324
325
return r;
326
}
327
328
static void __exit dm_st_exit(void)
329
{
330
int r = dm_unregister_path_selector(&st_ps);
331
332
if (r < 0)
333
DMERR("unregister failed %d", r);
334
}
335
336
module_init(dm_st_init);
337
module_exit(dm_st_exit);
338
339
MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
340
MODULE_AUTHOR("Kiyoshi Ueda <[email protected]>");
341
MODULE_LICENSE("GPL");
342
343