Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sparc/kernel/cpumap.c
10817 views
1
/* cpumap.c: used for optimizing CPU assignment
2
*
3
* Copyright (C) 2009 Hong H. Pham <[email protected]>
4
*/
5
6
#include <linux/module.h>
7
#include <linux/slab.h>
8
#include <linux/kernel.h>
9
#include <linux/init.h>
10
#include <linux/cpumask.h>
11
#include <linux/spinlock.h>
12
#include <asm/cpudata.h>
13
#include "cpumap.h"
14
15
16
enum {
17
CPUINFO_LVL_ROOT = 0,
18
CPUINFO_LVL_NODE,
19
CPUINFO_LVL_CORE,
20
CPUINFO_LVL_PROC,
21
CPUINFO_LVL_MAX,
22
};
23
24
enum {
25
ROVER_NO_OP = 0,
26
/* Increment rover every time level is visited */
27
ROVER_INC_ON_VISIT = 1 << 0,
28
/* Increment parent's rover every time rover wraps around */
29
ROVER_INC_PARENT_ON_LOOP = 1 << 1,
30
};
31
32
struct cpuinfo_node {
33
int id;
34
int level;
35
int num_cpus; /* Number of CPUs in this hierarchy */
36
int parent_index;
37
int child_start; /* Array index of the first child node */
38
int child_end; /* Array index of the last child node */
39
int rover; /* Child node iterator */
40
};
41
42
struct cpuinfo_level {
43
int start_index; /* Index of first node of a level in a cpuinfo tree */
44
int end_index; /* Index of last node of a level in a cpuinfo tree */
45
int num_nodes; /* Number of nodes in a level in a cpuinfo tree */
46
};
47
48
struct cpuinfo_tree {
49
int total_nodes;
50
51
/* Offsets into nodes[] for each level of the tree */
52
struct cpuinfo_level level[CPUINFO_LVL_MAX];
53
struct cpuinfo_node nodes[0];
54
};
55
56
57
static struct cpuinfo_tree *cpuinfo_tree;
58
59
static u16 cpu_distribution_map[NR_CPUS];
60
static DEFINE_SPINLOCK(cpu_map_lock);
61
62
63
/* Niagara optimized cpuinfo tree traversal. */
64
static const int niagara_iterate_method[] = {
65
[CPUINFO_LVL_ROOT] = ROVER_NO_OP,
66
67
/* Strands (or virtual CPUs) within a core may not run concurrently
68
* on the Niagara, as instruction pipeline(s) are shared. Distribute
69
* work to strands in different cores first for better concurrency.
70
* Go to next NUMA node when all cores are used.
71
*/
72
[CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
73
74
/* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
75
* a proc_id represents an instruction pipeline. Distribute work to
76
* strands in different proc_id groups if the core has multiple
77
* instruction pipelines (e.g. the Niagara 2/2+ has two).
78
*/
79
[CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
80
81
/* Pick the next strand in the proc_id group. */
82
[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
83
};
84
85
/* Generic cpuinfo tree traversal. Distribute work round robin across NUMA
86
* nodes.
87
*/
88
static const int generic_iterate_method[] = {
89
[CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
90
[CPUINFO_LVL_NODE] = ROVER_NO_OP,
91
[CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
92
[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
93
};
94
95
96
static int cpuinfo_id(int cpu, int level)
97
{
98
int id;
99
100
switch (level) {
101
case CPUINFO_LVL_ROOT:
102
id = 0;
103
break;
104
case CPUINFO_LVL_NODE:
105
id = cpu_to_node(cpu);
106
break;
107
case CPUINFO_LVL_CORE:
108
id = cpu_data(cpu).core_id;
109
break;
110
case CPUINFO_LVL_PROC:
111
id = cpu_data(cpu).proc_id;
112
break;
113
default:
114
id = -EINVAL;
115
}
116
return id;
117
}
118
119
/*
120
* Enumerate the CPU information in __cpu_data to determine the start index,
121
* end index, and number of nodes for each level in the cpuinfo tree. The
122
* total number of cpuinfo nodes required to build the tree is returned.
123
*/
124
static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
125
{
126
int prev_id[CPUINFO_LVL_MAX];
127
int i, n, num_nodes;
128
129
for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
130
struct cpuinfo_level *lv = &tree_level[i];
131
132
prev_id[i] = -1;
133
lv->start_index = lv->end_index = lv->num_nodes = 0;
134
}
135
136
num_nodes = 1; /* Include the root node */
137
138
for (i = 0; i < num_possible_cpus(); i++) {
139
if (!cpu_online(i))
140
continue;
141
142
n = cpuinfo_id(i, CPUINFO_LVL_NODE);
143
if (n > prev_id[CPUINFO_LVL_NODE]) {
144
tree_level[CPUINFO_LVL_NODE].num_nodes++;
145
prev_id[CPUINFO_LVL_NODE] = n;
146
num_nodes++;
147
}
148
n = cpuinfo_id(i, CPUINFO_LVL_CORE);
149
if (n > prev_id[CPUINFO_LVL_CORE]) {
150
tree_level[CPUINFO_LVL_CORE].num_nodes++;
151
prev_id[CPUINFO_LVL_CORE] = n;
152
num_nodes++;
153
}
154
n = cpuinfo_id(i, CPUINFO_LVL_PROC);
155
if (n > prev_id[CPUINFO_LVL_PROC]) {
156
tree_level[CPUINFO_LVL_PROC].num_nodes++;
157
prev_id[CPUINFO_LVL_PROC] = n;
158
num_nodes++;
159
}
160
}
161
162
tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
163
164
n = tree_level[CPUINFO_LVL_NODE].num_nodes;
165
tree_level[CPUINFO_LVL_NODE].start_index = 1;
166
tree_level[CPUINFO_LVL_NODE].end_index = n;
167
168
n++;
169
tree_level[CPUINFO_LVL_CORE].start_index = n;
170
n += tree_level[CPUINFO_LVL_CORE].num_nodes;
171
tree_level[CPUINFO_LVL_CORE].end_index = n - 1;
172
173
tree_level[CPUINFO_LVL_PROC].start_index = n;
174
n += tree_level[CPUINFO_LVL_PROC].num_nodes;
175
tree_level[CPUINFO_LVL_PROC].end_index = n - 1;
176
177
return num_nodes;
178
}
179
180
/* Build a tree representation of the CPU hierarchy using the per CPU
181
* information in __cpu_data. Entries in __cpu_data[0..NR_CPUS] are
182
* assumed to be sorted in ascending order based on node, core_id, and
183
* proc_id (in order of significance).
184
*/
185
static struct cpuinfo_tree *build_cpuinfo_tree(void)
186
{
187
struct cpuinfo_tree *new_tree;
188
struct cpuinfo_node *node;
189
struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
190
int num_cpus[CPUINFO_LVL_MAX];
191
int level_rover[CPUINFO_LVL_MAX];
192
int prev_id[CPUINFO_LVL_MAX];
193
int n, id, cpu, prev_cpu, last_cpu, level;
194
195
n = enumerate_cpuinfo_nodes(tmp_level);
196
197
new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
198
(sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
199
if (!new_tree)
200
return NULL;
201
202
new_tree->total_nodes = n;
203
memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
204
205
prev_cpu = cpu = cpumask_first(cpu_online_mask);
206
207
/* Initialize all levels in the tree with the first CPU */
208
for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
209
n = new_tree->level[level].start_index;
210
211
level_rover[level] = n;
212
node = &new_tree->nodes[n];
213
214
id = cpuinfo_id(cpu, level);
215
if (unlikely(id < 0)) {
216
kfree(new_tree);
217
return NULL;
218
}
219
node->id = id;
220
node->level = level;
221
node->num_cpus = 1;
222
223
node->parent_index = (level > CPUINFO_LVL_ROOT)
224
? new_tree->level[level - 1].start_index : -1;
225
226
node->child_start = node->child_end = node->rover =
227
(level == CPUINFO_LVL_PROC)
228
? cpu : new_tree->level[level + 1].start_index;
229
230
prev_id[level] = node->id;
231
num_cpus[level] = 1;
232
}
233
234
for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
235
if (cpu_online(last_cpu))
236
break;
237
}
238
239
while (++cpu <= last_cpu) {
240
if (!cpu_online(cpu))
241
continue;
242
243
for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
244
level--) {
245
id = cpuinfo_id(cpu, level);
246
if (unlikely(id < 0)) {
247
kfree(new_tree);
248
return NULL;
249
}
250
251
if ((id != prev_id[level]) || (cpu == last_cpu)) {
252
prev_id[level] = id;
253
node = &new_tree->nodes[level_rover[level]];
254
node->num_cpus = num_cpus[level];
255
num_cpus[level] = 1;
256
257
if (cpu == last_cpu)
258
node->num_cpus++;
259
260
/* Connect tree node to parent */
261
if (level == CPUINFO_LVL_ROOT)
262
node->parent_index = -1;
263
else
264
node->parent_index =
265
level_rover[level - 1];
266
267
if (level == CPUINFO_LVL_PROC) {
268
node->child_end =
269
(cpu == last_cpu) ? cpu : prev_cpu;
270
} else {
271
node->child_end =
272
level_rover[level + 1] - 1;
273
}
274
275
/* Initialize the next node in the same level */
276
n = ++level_rover[level];
277
if (n <= new_tree->level[level].end_index) {
278
node = &new_tree->nodes[n];
279
node->id = id;
280
node->level = level;
281
282
/* Connect node to child */
283
node->child_start = node->child_end =
284
node->rover =
285
(level == CPUINFO_LVL_PROC)
286
? cpu : level_rover[level + 1];
287
}
288
} else
289
num_cpus[level]++;
290
}
291
prev_cpu = cpu;
292
}
293
294
return new_tree;
295
}
296
297
static void increment_rover(struct cpuinfo_tree *t, int node_index,
298
int root_index, const int *rover_inc_table)
299
{
300
struct cpuinfo_node *node = &t->nodes[node_index];
301
int top_level, level;
302
303
top_level = t->nodes[root_index].level;
304
for (level = node->level; level >= top_level; level--) {
305
node->rover++;
306
if (node->rover <= node->child_end)
307
return;
308
309
node->rover = node->child_start;
310
/* If parent's rover does not need to be adjusted, stop here. */
311
if ((level == top_level) ||
312
!(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
313
return;
314
315
node = &t->nodes[node->parent_index];
316
}
317
}
318
319
static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
320
{
321
const int *rover_inc_table;
322
int level, new_index, index = root_index;
323
324
switch (sun4v_chip_type) {
325
case SUN4V_CHIP_NIAGARA1:
326
case SUN4V_CHIP_NIAGARA2:
327
rover_inc_table = niagara_iterate_method;
328
break;
329
default:
330
rover_inc_table = generic_iterate_method;
331
}
332
333
for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
334
level++) {
335
new_index = t->nodes[index].rover;
336
if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
337
increment_rover(t, index, root_index, rover_inc_table);
338
339
index = new_index;
340
}
341
return index;
342
}
343
344
static void _cpu_map_rebuild(void)
345
{
346
int i;
347
348
if (cpuinfo_tree) {
349
kfree(cpuinfo_tree);
350
cpuinfo_tree = NULL;
351
}
352
353
cpuinfo_tree = build_cpuinfo_tree();
354
if (!cpuinfo_tree)
355
return;
356
357
/* Build CPU distribution map that spans all online CPUs. No need
358
* to check if the CPU is online, as that is done when the cpuinfo
359
* tree is being built.
360
*/
361
for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
362
cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
363
}
364
365
/* Fallback if the cpuinfo tree could not be built. CPU mapping is linear
366
* round robin.
367
*/
368
static int simple_map_to_cpu(unsigned int index)
369
{
370
int i, end, cpu_rover;
371
372
cpu_rover = 0;
373
end = index % num_online_cpus();
374
for (i = 0; i < num_possible_cpus(); i++) {
375
if (cpu_online(cpu_rover)) {
376
if (cpu_rover >= end)
377
return cpu_rover;
378
379
cpu_rover++;
380
}
381
}
382
383
/* Impossible, since num_online_cpus() <= num_possible_cpus() */
384
return cpumask_first(cpu_online_mask);
385
}
386
387
static int _map_to_cpu(unsigned int index)
388
{
389
struct cpuinfo_node *root_node;
390
391
if (unlikely(!cpuinfo_tree)) {
392
_cpu_map_rebuild();
393
if (!cpuinfo_tree)
394
return simple_map_to_cpu(index);
395
}
396
397
root_node = &cpuinfo_tree->nodes[0];
398
#ifdef CONFIG_HOTPLUG_CPU
399
if (unlikely(root_node->num_cpus != num_online_cpus())) {
400
_cpu_map_rebuild();
401
if (!cpuinfo_tree)
402
return simple_map_to_cpu(index);
403
}
404
#endif
405
return cpu_distribution_map[index % root_node->num_cpus];
406
}
407
408
int map_to_cpu(unsigned int index)
409
{
410
int mapped_cpu;
411
unsigned long flag;
412
413
spin_lock_irqsave(&cpu_map_lock, flag);
414
mapped_cpu = _map_to_cpu(index);
415
416
#ifdef CONFIG_HOTPLUG_CPU
417
while (unlikely(!cpu_online(mapped_cpu)))
418
mapped_cpu = _map_to_cpu(index);
419
#endif
420
spin_unlock_irqrestore(&cpu_map_lock, flag);
421
return mapped_cpu;
422
}
423
EXPORT_SYMBOL(map_to_cpu);
424
425
void cpu_map_rebuild(void)
426
{
427
unsigned long flag;
428
429
spin_lock_irqsave(&cpu_map_lock, flag);
430
_cpu_map_rebuild();
431
spin_unlock_irqrestore(&cpu_map_lock, flag);
432
}
433
434