Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/powerpc/mm/numa.c
10817 views
1
/*
2
* pSeries NUMA support
3
*
4
* Copyright (C) 2002 Anton Blanchard <[email protected]>, IBM
5
*
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU General Public License
8
* as published by the Free Software Foundation; either version
9
* 2 of the License, or (at your option) any later version.
10
*/
11
#include <linux/threads.h>
12
#include <linux/bootmem.h>
13
#include <linux/init.h>
14
#include <linux/mm.h>
15
#include <linux/mmzone.h>
16
#include <linux/module.h>
17
#include <linux/nodemask.h>
18
#include <linux/cpu.h>
19
#include <linux/notifier.h>
20
#include <linux/memblock.h>
21
#include <linux/of.h>
22
#include <linux/pfn.h>
23
#include <linux/cpuset.h>
24
#include <linux/node.h>
25
#include <asm/sparsemem.h>
26
#include <asm/prom.h>
27
#include <asm/system.h>
28
#include <asm/smp.h>
29
#include <asm/firmware.h>
30
#include <asm/paca.h>
31
#include <asm/hvcall.h>
32
33
static int numa_enabled = 1;
34
35
static char *cmdline __initdata;
36
37
static int numa_debug;
38
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
39
40
int numa_cpu_lookup_table[NR_CPUS];
41
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
42
struct pglist_data *node_data[MAX_NUMNODES];
43
44
EXPORT_SYMBOL(numa_cpu_lookup_table);
45
EXPORT_SYMBOL(node_to_cpumask_map);
46
EXPORT_SYMBOL(node_data);
47
48
static int min_common_depth;
49
static int n_mem_addr_cells, n_mem_size_cells;
50
static int form1_affinity;
51
52
#define MAX_DISTANCE_REF_POINTS 4
53
static int distance_ref_points_depth;
54
static const unsigned int *distance_ref_points;
55
static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
56
57
/*
58
* Allocate node_to_cpumask_map based on number of available nodes
59
* Requires node_possible_map to be valid.
60
*
61
* Note: node_to_cpumask() is not valid until after this is done.
62
*/
63
static void __init setup_node_to_cpumask_map(void)
64
{
65
unsigned int node, num = 0;
66
67
/* setup nr_node_ids if not done yet */
68
if (nr_node_ids == MAX_NUMNODES) {
69
for_each_node_mask(node, node_possible_map)
70
num = node;
71
nr_node_ids = num + 1;
72
}
73
74
/* allocate the map */
75
for (node = 0; node < nr_node_ids; node++)
76
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
77
78
/* cpumask_of_node() will now work */
79
dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
80
}
81
82
static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
83
unsigned int *nid)
84
{
85
unsigned long long mem;
86
char *p = cmdline;
87
static unsigned int fake_nid;
88
static unsigned long long curr_boundary;
89
90
/*
91
* Modify node id, iff we started creating NUMA nodes
92
* We want to continue from where we left of the last time
93
*/
94
if (fake_nid)
95
*nid = fake_nid;
96
/*
97
* In case there are no more arguments to parse, the
98
* node_id should be the same as the last fake node id
99
* (we've handled this above).
100
*/
101
if (!p)
102
return 0;
103
104
mem = memparse(p, &p);
105
if (!mem)
106
return 0;
107
108
if (mem < curr_boundary)
109
return 0;
110
111
curr_boundary = mem;
112
113
if ((end_pfn << PAGE_SHIFT) > mem) {
114
/*
115
* Skip commas and spaces
116
*/
117
while (*p == ',' || *p == ' ' || *p == '\t')
118
p++;
119
120
cmdline = p;
121
fake_nid++;
122
*nid = fake_nid;
123
dbg("created new fake_node with id %d\n", fake_nid);
124
return 1;
125
}
126
return 0;
127
}
128
129
/*
130
* get_active_region_work_fn - A helper function for get_node_active_region
131
* Returns datax set to the start_pfn and end_pfn if they contain
132
* the initial value of datax->start_pfn between them
133
* @start_pfn: start page(inclusive) of region to check
134
* @end_pfn: end page(exclusive) of region to check
135
* @datax: comes in with ->start_pfn set to value to search for and
136
* goes out with active range if it contains it
137
* Returns 1 if search value is in range else 0
138
*/
139
static int __init get_active_region_work_fn(unsigned long start_pfn,
140
unsigned long end_pfn, void *datax)
141
{
142
struct node_active_region *data;
143
data = (struct node_active_region *)datax;
144
145
if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
146
data->start_pfn = start_pfn;
147
data->end_pfn = end_pfn;
148
return 1;
149
}
150
return 0;
151
152
}
153
154
/*
155
* get_node_active_region - Return active region containing start_pfn
156
* Active range returned is empty if none found.
157
* @start_pfn: The page to return the region for.
158
* @node_ar: Returned set to the active region containing start_pfn
159
*/
160
static void __init get_node_active_region(unsigned long start_pfn,
161
struct node_active_region *node_ar)
162
{
163
int nid = early_pfn_to_nid(start_pfn);
164
165
node_ar->nid = nid;
166
node_ar->start_pfn = start_pfn;
167
node_ar->end_pfn = start_pfn;
168
work_with_active_regions(nid, get_active_region_work_fn, node_ar);
169
}
170
171
static void map_cpu_to_node(int cpu, int node)
172
{
173
numa_cpu_lookup_table[cpu] = node;
174
175
dbg("adding cpu %d to node %d\n", cpu, node);
176
177
if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
178
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
179
}
180
181
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
182
static void unmap_cpu_from_node(unsigned long cpu)
183
{
184
int node = numa_cpu_lookup_table[cpu];
185
186
dbg("removing cpu %lu from node %d\n", cpu, node);
187
188
if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
189
cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
190
} else {
191
printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
192
cpu, node);
193
}
194
}
195
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
196
197
/* must hold reference to node during call */
198
static const int *of_get_associativity(struct device_node *dev)
199
{
200
return of_get_property(dev, "ibm,associativity", NULL);
201
}
202
203
/*
204
* Returns the property linux,drconf-usable-memory if
205
* it exists (the property exists only in kexec/kdump kernels,
206
* added by kexec-tools)
207
*/
208
static const u32 *of_get_usable_memory(struct device_node *memory)
209
{
210
const u32 *prop;
211
u32 len;
212
prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
213
if (!prop || len < sizeof(unsigned int))
214
return 0;
215
return prop;
216
}
217
218
int __node_distance(int a, int b)
219
{
220
int i;
221
int distance = LOCAL_DISTANCE;
222
223
if (!form1_affinity)
224
return distance;
225
226
for (i = 0; i < distance_ref_points_depth; i++) {
227
if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
228
break;
229
230
/* Double the distance for each NUMA level */
231
distance *= 2;
232
}
233
234
return distance;
235
}
236
237
static void initialize_distance_lookup_table(int nid,
238
const unsigned int *associativity)
239
{
240
int i;
241
242
if (!form1_affinity)
243
return;
244
245
for (i = 0; i < distance_ref_points_depth; i++) {
246
distance_lookup_table[nid][i] =
247
associativity[distance_ref_points[i]];
248
}
249
}
250
251
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
252
* info is found.
253
*/
254
static int associativity_to_nid(const unsigned int *associativity)
255
{
256
int nid = -1;
257
258
if (min_common_depth == -1)
259
goto out;
260
261
if (associativity[0] >= min_common_depth)
262
nid = associativity[min_common_depth];
263
264
/* POWER4 LPAR uses 0xffff as invalid node */
265
if (nid == 0xffff || nid >= MAX_NUMNODES)
266
nid = -1;
267
268
if (nid > 0 && associativity[0] >= distance_ref_points_depth)
269
initialize_distance_lookup_table(nid, associativity);
270
271
out:
272
return nid;
273
}
274
275
/* Returns the nid associated with the given device tree node,
276
* or -1 if not found.
277
*/
278
static int of_node_to_nid_single(struct device_node *device)
279
{
280
int nid = -1;
281
const unsigned int *tmp;
282
283
tmp = of_get_associativity(device);
284
if (tmp)
285
nid = associativity_to_nid(tmp);
286
return nid;
287
}
288
289
/* Walk the device tree upwards, looking for an associativity id */
290
int of_node_to_nid(struct device_node *device)
291
{
292
struct device_node *tmp;
293
int nid = -1;
294
295
of_node_get(device);
296
while (device) {
297
nid = of_node_to_nid_single(device);
298
if (nid != -1)
299
break;
300
301
tmp = device;
302
device = of_get_parent(tmp);
303
of_node_put(tmp);
304
}
305
of_node_put(device);
306
307
return nid;
308
}
309
EXPORT_SYMBOL_GPL(of_node_to_nid);
310
311
static int __init find_min_common_depth(void)
312
{
313
int depth;
314
struct device_node *chosen;
315
struct device_node *root;
316
const char *vec5;
317
318
root = of_find_node_by_path("/rtas");
319
if (!root)
320
root = of_find_node_by_path("/");
321
322
/*
323
* This property is a set of 32-bit integers, each representing
324
* an index into the ibm,associativity nodes.
325
*
326
* With form 0 affinity the first integer is for an SMP configuration
327
* (should be all 0's) and the second is for a normal NUMA
328
* configuration. We have only one level of NUMA.
329
*
330
* With form 1 affinity the first integer is the most significant
331
* NUMA boundary and the following are progressively less significant
332
* boundaries. There can be more than one level of NUMA.
333
*/
334
distance_ref_points = of_get_property(root,
335
"ibm,associativity-reference-points",
336
&distance_ref_points_depth);
337
338
if (!distance_ref_points) {
339
dbg("NUMA: ibm,associativity-reference-points not found.\n");
340
goto err;
341
}
342
343
distance_ref_points_depth /= sizeof(int);
344
345
#define VEC5_AFFINITY_BYTE 5
346
#define VEC5_AFFINITY 0x80
347
chosen = of_find_node_by_path("/chosen");
348
if (chosen) {
349
vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);
350
if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {
351
dbg("Using form 1 affinity\n");
352
form1_affinity = 1;
353
}
354
}
355
356
if (form1_affinity) {
357
depth = distance_ref_points[0];
358
} else {
359
if (distance_ref_points_depth < 2) {
360
printk(KERN_WARNING "NUMA: "
361
"short ibm,associativity-reference-points\n");
362
goto err;
363
}
364
365
depth = distance_ref_points[1];
366
}
367
368
/*
369
* Warn and cap if the hardware supports more than
370
* MAX_DISTANCE_REF_POINTS domains.
371
*/
372
if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
373
printk(KERN_WARNING "NUMA: distance array capped at "
374
"%d entries\n", MAX_DISTANCE_REF_POINTS);
375
distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
376
}
377
378
of_node_put(root);
379
return depth;
380
381
err:
382
of_node_put(root);
383
return -1;
384
}
385
386
static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
387
{
388
struct device_node *memory = NULL;
389
390
memory = of_find_node_by_type(memory, "memory");
391
if (!memory)
392
panic("numa.c: No memory nodes found!");
393
394
*n_addr_cells = of_n_addr_cells(memory);
395
*n_size_cells = of_n_size_cells(memory);
396
of_node_put(memory);
397
}
398
399
static unsigned long __devinit read_n_cells(int n, const unsigned int **buf)
400
{
401
unsigned long result = 0;
402
403
while (n--) {
404
result = (result << 32) | **buf;
405
(*buf)++;
406
}
407
return result;
408
}
409
410
struct of_drconf_cell {
411
u64 base_addr;
412
u32 drc_index;
413
u32 reserved;
414
u32 aa_index;
415
u32 flags;
416
};
417
418
#define DRCONF_MEM_ASSIGNED 0x00000008
419
#define DRCONF_MEM_AI_INVALID 0x00000040
420
#define DRCONF_MEM_RESERVED 0x00000080
421
422
/*
423
* Read the next memblock list entry from the ibm,dynamic-memory property
424
* and return the information in the provided of_drconf_cell structure.
425
*/
426
static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp)
427
{
428
const u32 *cp;
429
430
drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
431
432
cp = *cellp;
433
drmem->drc_index = cp[0];
434
drmem->reserved = cp[1];
435
drmem->aa_index = cp[2];
436
drmem->flags = cp[3];
437
438
*cellp = cp + 4;
439
}
440
441
/*
442
* Retrieve and validate the ibm,dynamic-memory property of the device tree.
443
*
444
* The layout of the ibm,dynamic-memory property is a number N of memblock
445
* list entries followed by N memblock list entries. Each memblock list entry
446
* contains information as laid out in the of_drconf_cell struct above.
447
*/
448
static int of_get_drconf_memory(struct device_node *memory, const u32 **dm)
449
{
450
const u32 *prop;
451
u32 len, entries;
452
453
prop = of_get_property(memory, "ibm,dynamic-memory", &len);
454
if (!prop || len < sizeof(unsigned int))
455
return 0;
456
457
entries = *prop++;
458
459
/* Now that we know the number of entries, revalidate the size
460
* of the property read in to ensure we have everything
461
*/
462
if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
463
return 0;
464
465
*dm = prop;
466
return entries;
467
}
468
469
/*
470
* Retrieve and validate the ibm,lmb-size property for drconf memory
471
* from the device tree.
472
*/
473
static u64 of_get_lmb_size(struct device_node *memory)
474
{
475
const u32 *prop;
476
u32 len;
477
478
prop = of_get_property(memory, "ibm,lmb-size", &len);
479
if (!prop || len < sizeof(unsigned int))
480
return 0;
481
482
return read_n_cells(n_mem_size_cells, &prop);
483
}
484
485
struct assoc_arrays {
486
u32 n_arrays;
487
u32 array_sz;
488
const u32 *arrays;
489
};
490
491
/*
492
* Retrieve and validate the list of associativity arrays for drconf
493
* memory from the ibm,associativity-lookup-arrays property of the
494
* device tree..
495
*
496
* The layout of the ibm,associativity-lookup-arrays property is a number N
497
* indicating the number of associativity arrays, followed by a number M
498
* indicating the size of each associativity array, followed by a list
499
* of N associativity arrays.
500
*/
501
static int of_get_assoc_arrays(struct device_node *memory,
502
struct assoc_arrays *aa)
503
{
504
const u32 *prop;
505
u32 len;
506
507
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
508
if (!prop || len < 2 * sizeof(unsigned int))
509
return -1;
510
511
aa->n_arrays = *prop++;
512
aa->array_sz = *prop++;
513
514
/* Now that we know the number of arrrays and size of each array,
515
* revalidate the size of the property read in.
516
*/
517
if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int))
518
return -1;
519
520
aa->arrays = prop;
521
return 0;
522
}
523
524
/*
525
* This is like of_node_to_nid_single() for memory represented in the
526
* ibm,dynamic-reconfiguration-memory node.
527
*/
528
static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
529
struct assoc_arrays *aa)
530
{
531
int default_nid = 0;
532
int nid = default_nid;
533
int index;
534
535
if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
536
!(drmem->flags & DRCONF_MEM_AI_INVALID) &&
537
drmem->aa_index < aa->n_arrays) {
538
index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
539
nid = aa->arrays[index];
540
541
if (nid == 0xffff || nid >= MAX_NUMNODES)
542
nid = default_nid;
543
}
544
545
return nid;
546
}
547
548
/*
549
* Figure out to which domain a cpu belongs and stick it there.
550
* Return the id of the domain used.
551
*/
552
static int __cpuinit numa_setup_cpu(unsigned long lcpu)
553
{
554
int nid = 0;
555
struct device_node *cpu = of_get_cpu_node(lcpu, NULL);
556
557
if (!cpu) {
558
WARN_ON(1);
559
goto out;
560
}
561
562
nid = of_node_to_nid_single(cpu);
563
564
if (nid < 0 || !node_online(nid))
565
nid = first_online_node;
566
out:
567
map_cpu_to_node(lcpu, nid);
568
569
of_node_put(cpu);
570
571
return nid;
572
}
573
574
static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
575
unsigned long action,
576
void *hcpu)
577
{
578
unsigned long lcpu = (unsigned long)hcpu;
579
int ret = NOTIFY_DONE;
580
581
switch (action) {
582
case CPU_UP_PREPARE:
583
case CPU_UP_PREPARE_FROZEN:
584
numa_setup_cpu(lcpu);
585
ret = NOTIFY_OK;
586
break;
587
#ifdef CONFIG_HOTPLUG_CPU
588
case CPU_DEAD:
589
case CPU_DEAD_FROZEN:
590
case CPU_UP_CANCELED:
591
case CPU_UP_CANCELED_FROZEN:
592
unmap_cpu_from_node(lcpu);
593
break;
594
ret = NOTIFY_OK;
595
#endif
596
}
597
return ret;
598
}
599
600
/*
601
* Check and possibly modify a memory region to enforce the memory limit.
602
*
603
* Returns the size the region should have to enforce the memory limit.
604
* This will either be the original value of size, a truncated value,
605
* or zero. If the returned value of size is 0 the region should be
606
* discarded as it lies wholly above the memory limit.
607
*/
608
static unsigned long __init numa_enforce_memory_limit(unsigned long start,
609
unsigned long size)
610
{
611
/*
612
* We use memblock_end_of_DRAM() in here instead of memory_limit because
613
* we've already adjusted it for the limit and it takes care of
614
* having memory holes below the limit. Also, in the case of
615
* iommu_is_off, memory_limit is not set but is implicitly enforced.
616
*/
617
618
if (start + size <= memblock_end_of_DRAM())
619
return size;
620
621
if (start >= memblock_end_of_DRAM())
622
return 0;
623
624
return memblock_end_of_DRAM() - start;
625
}
626
627
/*
628
* Reads the counter for a given entry in
629
* linux,drconf-usable-memory property
630
*/
631
static inline int __init read_usm_ranges(const u32 **usm)
632
{
633
/*
634
* For each lmb in ibm,dynamic-memory a corresponding
635
* entry in linux,drconf-usable-memory property contains
636
* a counter followed by that many (base, size) duple.
637
* read the counter from linux,drconf-usable-memory
638
*/
639
return read_n_cells(n_mem_size_cells, usm);
640
}
641
642
/*
643
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
644
* node. This assumes n_mem_{addr,size}_cells have been set.
645
*/
646
static void __init parse_drconf_memory(struct device_node *memory)
647
{
648
const u32 *dm, *usm;
649
unsigned int n, rc, ranges, is_kexec_kdump = 0;
650
unsigned long lmb_size, base, size, sz;
651
int nid;
652
struct assoc_arrays aa;
653
654
n = of_get_drconf_memory(memory, &dm);
655
if (!n)
656
return;
657
658
lmb_size = of_get_lmb_size(memory);
659
if (!lmb_size)
660
return;
661
662
rc = of_get_assoc_arrays(memory, &aa);
663
if (rc)
664
return;
665
666
/* check if this is a kexec/kdump kernel */
667
usm = of_get_usable_memory(memory);
668
if (usm != NULL)
669
is_kexec_kdump = 1;
670
671
for (; n != 0; --n) {
672
struct of_drconf_cell drmem;
673
674
read_drconf_cell(&drmem, &dm);
675
676
/* skip this block if the reserved bit is set in flags (0x80)
677
or if the block is not assigned to this partition (0x8) */
678
if ((drmem.flags & DRCONF_MEM_RESERVED)
679
|| !(drmem.flags & DRCONF_MEM_ASSIGNED))
680
continue;
681
682
base = drmem.base_addr;
683
size = lmb_size;
684
ranges = 1;
685
686
if (is_kexec_kdump) {
687
ranges = read_usm_ranges(&usm);
688
if (!ranges) /* there are no (base, size) duple */
689
continue;
690
}
691
do {
692
if (is_kexec_kdump) {
693
base = read_n_cells(n_mem_addr_cells, &usm);
694
size = read_n_cells(n_mem_size_cells, &usm);
695
}
696
nid = of_drconf_to_nid_single(&drmem, &aa);
697
fake_numa_create_new_node(
698
((base + size) >> PAGE_SHIFT),
699
&nid);
700
node_set_online(nid);
701
sz = numa_enforce_memory_limit(base, size);
702
if (sz)
703
add_active_range(nid, base >> PAGE_SHIFT,
704
(base >> PAGE_SHIFT)
705
+ (sz >> PAGE_SHIFT));
706
} while (--ranges);
707
}
708
}
709
710
static int __init parse_numa_properties(void)
711
{
712
struct device_node *cpu = NULL;
713
struct device_node *memory = NULL;
714
int default_nid = 0;
715
unsigned long i;
716
717
if (numa_enabled == 0) {
718
printk(KERN_WARNING "NUMA disabled by user\n");
719
return -1;
720
}
721
722
min_common_depth = find_min_common_depth();
723
724
if (min_common_depth < 0)
725
return min_common_depth;
726
727
dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
728
729
/*
730
* Even though we connect cpus to numa domains later in SMP
731
* init, we need to know the node ids now. This is because
732
* each node to be onlined must have NODE_DATA etc backing it.
733
*/
734
for_each_present_cpu(i) {
735
int nid;
736
737
cpu = of_get_cpu_node(i, NULL);
738
BUG_ON(!cpu);
739
nid = of_node_to_nid_single(cpu);
740
of_node_put(cpu);
741
742
/*
743
* Don't fall back to default_nid yet -- we will plug
744
* cpus into nodes once the memory scan has discovered
745
* the topology.
746
*/
747
if (nid < 0)
748
continue;
749
node_set_online(nid);
750
}
751
752
get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
753
memory = NULL;
754
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
755
unsigned long start;
756
unsigned long size;
757
int nid;
758
int ranges;
759
const unsigned int *memcell_buf;
760
unsigned int len;
761
762
memcell_buf = of_get_property(memory,
763
"linux,usable-memory", &len);
764
if (!memcell_buf || len <= 0)
765
memcell_buf = of_get_property(memory, "reg", &len);
766
if (!memcell_buf || len <= 0)
767
continue;
768
769
/* ranges in cell */
770
ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
771
new_range:
772
/* these are order-sensitive, and modify the buffer pointer */
773
start = read_n_cells(n_mem_addr_cells, &memcell_buf);
774
size = read_n_cells(n_mem_size_cells, &memcell_buf);
775
776
/*
777
* Assumption: either all memory nodes or none will
778
* have associativity properties. If none, then
779
* everything goes to default_nid.
780
*/
781
nid = of_node_to_nid_single(memory);
782
if (nid < 0)
783
nid = default_nid;
784
785
fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
786
node_set_online(nid);
787
788
if (!(size = numa_enforce_memory_limit(start, size))) {
789
if (--ranges)
790
goto new_range;
791
else
792
continue;
793
}
794
795
add_active_range(nid, start >> PAGE_SHIFT,
796
(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
797
798
if (--ranges)
799
goto new_range;
800
}
801
802
/*
803
* Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory
804
* property in the ibm,dynamic-reconfiguration-memory node.
805
*/
806
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
807
if (memory)
808
parse_drconf_memory(memory);
809
810
return 0;
811
}
812
813
static void __init setup_nonnuma(void)
814
{
815
unsigned long top_of_ram = memblock_end_of_DRAM();
816
unsigned long total_ram = memblock_phys_mem_size();
817
unsigned long start_pfn, end_pfn;
818
unsigned int nid = 0;
819
struct memblock_region *reg;
820
821
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
822
top_of_ram, total_ram);
823
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
824
(top_of_ram - total_ram) >> 20);
825
826
for_each_memblock(memory, reg) {
827
start_pfn = memblock_region_memory_base_pfn(reg);
828
end_pfn = memblock_region_memory_end_pfn(reg);
829
830
fake_numa_create_new_node(end_pfn, &nid);
831
add_active_range(nid, start_pfn, end_pfn);
832
node_set_online(nid);
833
}
834
}
835
836
void __init dump_numa_cpu_topology(void)
837
{
838
unsigned int node;
839
unsigned int cpu, count;
840
841
if (min_common_depth == -1 || !numa_enabled)
842
return;
843
844
for_each_online_node(node) {
845
printk(KERN_DEBUG "Node %d CPUs:", node);
846
847
count = 0;
848
/*
849
* If we used a CPU iterator here we would miss printing
850
* the holes in the cpumap.
851
*/
852
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
853
if (cpumask_test_cpu(cpu,
854
node_to_cpumask_map[node])) {
855
if (count == 0)
856
printk(" %u", cpu);
857
++count;
858
} else {
859
if (count > 1)
860
printk("-%u", cpu - 1);
861
count = 0;
862
}
863
}
864
865
if (count > 1)
866
printk("-%u", nr_cpu_ids - 1);
867
printk("\n");
868
}
869
}
870
871
static void __init dump_numa_memory_topology(void)
872
{
873
unsigned int node;
874
unsigned int count;
875
876
if (min_common_depth == -1 || !numa_enabled)
877
return;
878
879
for_each_online_node(node) {
880
unsigned long i;
881
882
printk(KERN_DEBUG "Node %d Memory:", node);
883
884
count = 0;
885
886
for (i = 0; i < memblock_end_of_DRAM();
887
i += (1 << SECTION_SIZE_BITS)) {
888
if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
889
if (count == 0)
890
printk(" 0x%lx", i);
891
++count;
892
} else {
893
if (count > 0)
894
printk("-0x%lx", i);
895
count = 0;
896
}
897
}
898
899
if (count > 0)
900
printk("-0x%lx", i);
901
printk("\n");
902
}
903
}
904
905
/*
906
* Allocate some memory, satisfying the memblock or bootmem allocator where
907
* required. nid is the preferred node and end is the physical address of
908
* the highest address in the node.
909
*
910
* Returns the virtual address of the memory.
911
*/
912
static void __init *careful_zallocation(int nid, unsigned long size,
913
unsigned long align,
914
unsigned long end_pfn)
915
{
916
void *ret;
917
int new_nid;
918
unsigned long ret_paddr;
919
920
ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
921
922
/* retry over all memory */
923
if (!ret_paddr)
924
ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
925
926
if (!ret_paddr)
927
panic("numa.c: cannot allocate %lu bytes for node %d",
928
size, nid);
929
930
ret = __va(ret_paddr);
931
932
/*
933
* We initialize the nodes in numeric order: 0, 1, 2...
934
* and hand over control from the MEMBLOCK allocator to the
935
* bootmem allocator. If this function is called for
936
* node 5, then we know that all nodes <5 are using the
937
* bootmem allocator instead of the MEMBLOCK allocator.
938
*
939
* So, check the nid from which this allocation came
940
* and double check to see if we need to use bootmem
941
* instead of the MEMBLOCK. We don't free the MEMBLOCK memory
942
* since it would be useless.
943
*/
944
new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
945
if (new_nid < nid) {
946
ret = __alloc_bootmem_node(NODE_DATA(new_nid),
947
size, align, 0);
948
949
dbg("alloc_bootmem %p %lx\n", ret, size);
950
}
951
952
memset(ret, 0, size);
953
return ret;
954
}
955
956
static struct notifier_block __cpuinitdata ppc64_numa_nb = {
957
.notifier_call = cpu_numa_callback,
958
.priority = 1 /* Must run before sched domains notifier. */
959
};
960
961
static void mark_reserved_regions_for_nid(int nid)
962
{
963
struct pglist_data *node = NODE_DATA(nid);
964
struct memblock_region *reg;
965
966
for_each_memblock(reserved, reg) {
967
unsigned long physbase = reg->base;
968
unsigned long size = reg->size;
969
unsigned long start_pfn = physbase >> PAGE_SHIFT;
970
unsigned long end_pfn = PFN_UP(physbase + size);
971
struct node_active_region node_ar;
972
unsigned long node_end_pfn = node->node_start_pfn +
973
node->node_spanned_pages;
974
975
/*
976
* Check to make sure that this memblock.reserved area is
977
* within the bounds of the node that we care about.
978
* Checking the nid of the start and end points is not
979
* sufficient because the reserved area could span the
980
* entire node.
981
*/
982
if (end_pfn <= node->node_start_pfn ||
983
start_pfn >= node_end_pfn)
984
continue;
985
986
get_node_active_region(start_pfn, &node_ar);
987
while (start_pfn < end_pfn &&
988
node_ar.start_pfn < node_ar.end_pfn) {
989
unsigned long reserve_size = size;
990
/*
991
* if reserved region extends past active region
992
* then trim size to active region
993
*/
994
if (end_pfn > node_ar.end_pfn)
995
reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
996
- physbase;
997
/*
998
* Only worry about *this* node, others may not
999
* yet have valid NODE_DATA().
1000
*/
1001
if (node_ar.nid == nid) {
1002
dbg("reserve_bootmem %lx %lx nid=%d\n",
1003
physbase, reserve_size, node_ar.nid);
1004
reserve_bootmem_node(NODE_DATA(node_ar.nid),
1005
physbase, reserve_size,
1006
BOOTMEM_DEFAULT);
1007
}
1008
/*
1009
* if reserved region is contained in the active region
1010
* then done.
1011
*/
1012
if (end_pfn <= node_ar.end_pfn)
1013
break;
1014
1015
/*
1016
* reserved region extends past the active region
1017
* get next active region that contains this
1018
* reserved region
1019
*/
1020
start_pfn = node_ar.end_pfn;
1021
physbase = start_pfn << PAGE_SHIFT;
1022
size = size - reserve_size;
1023
get_node_active_region(start_pfn, &node_ar);
1024
}
1025
}
1026
}
1027
1028
1029
void __init do_init_bootmem(void)
1030
{
1031
int nid;
1032
1033
min_low_pfn = 0;
1034
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
1035
max_pfn = max_low_pfn;
1036
1037
if (parse_numa_properties())
1038
setup_nonnuma();
1039
else
1040
dump_numa_memory_topology();
1041
1042
for_each_online_node(nid) {
1043
unsigned long start_pfn, end_pfn;
1044
void *bootmem_vaddr;
1045
unsigned long bootmap_pages;
1046
1047
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
1048
1049
/*
1050
* Allocate the node structure node local if possible
1051
*
1052
* Be careful moving this around, as it relies on all
1053
* previous nodes' bootmem to be initialized and have
1054
* all reserved areas marked.
1055
*/
1056
NODE_DATA(nid) = careful_zallocation(nid,
1057
sizeof(struct pglist_data),
1058
SMP_CACHE_BYTES, end_pfn);
1059
1060
dbg("node %d\n", nid);
1061
dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
1062
1063
NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
1064
NODE_DATA(nid)->node_start_pfn = start_pfn;
1065
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
1066
1067
if (NODE_DATA(nid)->node_spanned_pages == 0)
1068
continue;
1069
1070
dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
1071
dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
1072
1073
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
1074
bootmem_vaddr = careful_zallocation(nid,
1075
bootmap_pages << PAGE_SHIFT,
1076
PAGE_SIZE, end_pfn);
1077
1078
dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
1079
1080
init_bootmem_node(NODE_DATA(nid),
1081
__pa(bootmem_vaddr) >> PAGE_SHIFT,
1082
start_pfn, end_pfn);
1083
1084
free_bootmem_with_active_regions(nid, end_pfn);
1085
/*
1086
* Be very careful about moving this around. Future
1087
* calls to careful_zallocation() depend on this getting
1088
* done correctly.
1089
*/
1090
mark_reserved_regions_for_nid(nid);
1091
sparse_memory_present_with_active_regions(nid);
1092
}
1093
1094
init_bootmem_done = 1;
1095
1096
/*
1097
* Now bootmem is initialised we can create the node to cpumask
1098
* lookup tables and setup the cpu callback to populate them.
1099
*/
1100
setup_node_to_cpumask_map();
1101
1102
register_cpu_notifier(&ppc64_numa_nb);
1103
cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
1104
(void *)(unsigned long)boot_cpuid);
1105
}
1106
1107
void __init paging_init(void)
1108
{
1109
unsigned long max_zone_pfns[MAX_NR_ZONES];
1110
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
1111
max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
1112
free_area_init_nodes(max_zone_pfns);
1113
}
1114
1115
static int __init early_numa(char *p)
1116
{
1117
if (!p)
1118
return 0;
1119
1120
if (strstr(p, "off"))
1121
numa_enabled = 0;
1122
1123
if (strstr(p, "debug"))
1124
numa_debug = 1;
1125
1126
p = strstr(p, "fake=");
1127
if (p)
1128
cmdline = p + strlen("fake=");
1129
1130
return 0;
1131
}
1132
early_param("numa", early_numa);
1133
1134
#ifdef CONFIG_MEMORY_HOTPLUG
1135
/*
1136
* Find the node associated with a hot added memory section for
1137
* memory represented in the device tree by the property
1138
* ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
1139
*/
1140
static int hot_add_drconf_scn_to_nid(struct device_node *memory,
1141
unsigned long scn_addr)
1142
{
1143
const u32 *dm;
1144
unsigned int drconf_cell_cnt, rc;
1145
unsigned long lmb_size;
1146
struct assoc_arrays aa;
1147
int nid = -1;
1148
1149
drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
1150
if (!drconf_cell_cnt)
1151
return -1;
1152
1153
lmb_size = of_get_lmb_size(memory);
1154
if (!lmb_size)
1155
return -1;
1156
1157
rc = of_get_assoc_arrays(memory, &aa);
1158
if (rc)
1159
return -1;
1160
1161
for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
1162
struct of_drconf_cell drmem;
1163
1164
read_drconf_cell(&drmem, &dm);
1165
1166
/* skip this block if it is reserved or not assigned to
1167
* this partition */
1168
if ((drmem.flags & DRCONF_MEM_RESERVED)
1169
|| !(drmem.flags & DRCONF_MEM_ASSIGNED))
1170
continue;
1171
1172
if ((scn_addr < drmem.base_addr)
1173
|| (scn_addr >= (drmem.base_addr + lmb_size)))
1174
continue;
1175
1176
nid = of_drconf_to_nid_single(&drmem, &aa);
1177
break;
1178
}
1179
1180
return nid;
1181
}
1182
1183
/*
1184
* Find the node associated with a hot added memory section for memory
1185
* represented in the device tree as a node (i.e. memory@XXXX) for
1186
* each memblock.
1187
*/
1188
int hot_add_node_scn_to_nid(unsigned long scn_addr)
1189
{
1190
struct device_node *memory = NULL;
1191
int nid = -1;
1192
1193
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
1194
unsigned long start, size;
1195
int ranges;
1196
const unsigned int *memcell_buf;
1197
unsigned int len;
1198
1199
memcell_buf = of_get_property(memory, "reg", &len);
1200
if (!memcell_buf || len <= 0)
1201
continue;
1202
1203
/* ranges in cell */
1204
ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
1205
1206
while (ranges--) {
1207
start = read_n_cells(n_mem_addr_cells, &memcell_buf);
1208
size = read_n_cells(n_mem_size_cells, &memcell_buf);
1209
1210
if ((scn_addr < start) || (scn_addr >= (start + size)))
1211
continue;
1212
1213
nid = of_node_to_nid_single(memory);
1214
break;
1215
}
1216
1217
of_node_put(memory);
1218
if (nid >= 0)
1219
break;
1220
}
1221
1222
return nid;
1223
}
1224
1225
/*
1226
* Find the node associated with a hot added memory section. Section
1227
* corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that
1228
* sections are fully contained within a single MEMBLOCK.
1229
*/
1230
int hot_add_scn_to_nid(unsigned long scn_addr)
1231
{
1232
struct device_node *memory = NULL;
1233
int nid, found = 0;
1234
1235
if (!numa_enabled || (min_common_depth < 0))
1236
return first_online_node;
1237
1238
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1239
if (memory) {
1240
nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
1241
of_node_put(memory);
1242
} else {
1243
nid = hot_add_node_scn_to_nid(scn_addr);
1244
}
1245
1246
if (nid < 0 || !node_online(nid))
1247
nid = first_online_node;
1248
1249
if (NODE_DATA(nid)->node_spanned_pages)
1250
return nid;
1251
1252
for_each_online_node(nid) {
1253
if (NODE_DATA(nid)->node_spanned_pages) {
1254
found = 1;
1255
break;
1256
}
1257
}
1258
1259
BUG_ON(!found);
1260
return nid;
1261
}
1262
1263
static u64 hot_add_drconf_memory_max(void)
1264
{
1265
struct device_node *memory = NULL;
1266
unsigned int drconf_cell_cnt = 0;
1267
u64 lmb_size = 0;
1268
const u32 *dm = 0;
1269
1270
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
1271
if (memory) {
1272
drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
1273
lmb_size = of_get_lmb_size(memory);
1274
of_node_put(memory);
1275
}
1276
return lmb_size * drconf_cell_cnt;
1277
}
1278
1279
/*
1280
* memory_hotplug_max - return max address of memory that may be added
1281
*
1282
* This is currently only used on systems that support drconfig memory
1283
* hotplug.
1284
*/
1285
u64 memory_hotplug_max(void)
1286
{
1287
return max(hot_add_drconf_memory_max(), memblock_end_of_DRAM());
1288
}
1289
#endif /* CONFIG_MEMORY_HOTPLUG */
1290
1291
/* Virtual Processor Home Node (VPHN) support */
1292
#ifdef CONFIG_PPC_SPLPAR
1293
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
1294
static cpumask_t cpu_associativity_changes_mask;
1295
static int vphn_enabled;
1296
static void set_topology_timer(void);
1297
1298
/*
1299
* Store the current values of the associativity change counters in the
1300
* hypervisor.
1301
*/
1302
static void setup_cpu_associativity_change_counters(void)
1303
{
1304
int cpu;
1305
1306
/* The VPHN feature supports a maximum of 8 reference points */
1307
BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
1308
1309
for_each_possible_cpu(cpu) {
1310
int i;
1311
u8 *counts = vphn_cpu_change_counts[cpu];
1312
volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
1313
1314
for (i = 0; i < distance_ref_points_depth; i++)
1315
counts[i] = hypervisor_counts[i];
1316
}
1317
}
1318
1319
/*
1320
* The hypervisor maintains a set of 8 associativity change counters in
1321
* the VPA of each cpu that correspond to the associativity levels in the
1322
* ibm,associativity-reference-points property. When an associativity
1323
* level changes, the corresponding counter is incremented.
1324
*
1325
* Set a bit in cpu_associativity_changes_mask for each cpu whose home
1326
* node associativity levels have changed.
1327
*
1328
* Returns the number of cpus with unhandled associativity changes.
1329
*/
1330
static int update_cpu_associativity_changes_mask(void)
1331
{
1332
int cpu, nr_cpus = 0;
1333
cpumask_t *changes = &cpu_associativity_changes_mask;
1334
1335
cpumask_clear(changes);
1336
1337
for_each_possible_cpu(cpu) {
1338
int i, changed = 0;
1339
u8 *counts = vphn_cpu_change_counts[cpu];
1340
volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
1341
1342
for (i = 0; i < distance_ref_points_depth; i++) {
1343
if (hypervisor_counts[i] != counts[i]) {
1344
counts[i] = hypervisor_counts[i];
1345
changed = 1;
1346
}
1347
}
1348
if (changed) {
1349
cpumask_set_cpu(cpu, changes);
1350
nr_cpus++;
1351
}
1352
}
1353
1354
return nr_cpus;
1355
}
1356
1357
/*
1358
* 6 64-bit registers unpacked into 12 32-bit associativity values. To form
1359
* the complete property we have to add the length in the first cell.
1360
*/
1361
#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
1362
1363
/*
1364
* Convert the associativity domain numbers returned from the hypervisor
1365
* to the sequence they would appear in the ibm,associativity property.
1366
*/
1367
static int vphn_unpack_associativity(const long *packed, unsigned int *unpacked)
1368
{
1369
int i, nr_assoc_doms = 0;
1370
const u16 *field = (const u16*) packed;
1371
1372
#define VPHN_FIELD_UNUSED (0xffff)
1373
#define VPHN_FIELD_MSB (0x8000)
1374
#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
1375
1376
for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
1377
if (*field == VPHN_FIELD_UNUSED) {
1378
/* All significant fields processed, and remaining
1379
* fields contain the reserved value of all 1's.
1380
* Just store them.
1381
*/
1382
unpacked[i] = *((u32*)field);
1383
field += 2;
1384
} else if (*field & VPHN_FIELD_MSB) {
1385
/* Data is in the lower 15 bits of this field */
1386
unpacked[i] = *field & VPHN_FIELD_MASK;
1387
field++;
1388
nr_assoc_doms++;
1389
} else {
1390
/* Data is in the lower 15 bits of this field
1391
* concatenated with the next 16 bit field
1392
*/
1393
unpacked[i] = *((u32*)field);
1394
field += 2;
1395
nr_assoc_doms++;
1396
}
1397
}
1398
1399
/* The first cell contains the length of the property */
1400
unpacked[0] = nr_assoc_doms;
1401
1402
return nr_assoc_doms;
1403
}
1404
1405
/*
1406
* Retrieve the new associativity information for a virtual processor's
1407
* home node.
1408
*/
1409
static long hcall_vphn(unsigned long cpu, unsigned int *associativity)
1410
{
1411
long rc;
1412
long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
1413
u64 flags = 1;
1414
int hwcpu = get_hard_smp_processor_id(cpu);
1415
1416
rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
1417
vphn_unpack_associativity(retbuf, associativity);
1418
1419
return rc;
1420
}
1421
1422
static long vphn_get_associativity(unsigned long cpu,
1423
unsigned int *associativity)
1424
{
1425
long rc;
1426
1427
rc = hcall_vphn(cpu, associativity);
1428
1429
switch (rc) {
1430
case H_FUNCTION:
1431
printk(KERN_INFO
1432
"VPHN is not supported. Disabling polling...\n");
1433
stop_topology_update();
1434
break;
1435
case H_HARDWARE:
1436
printk(KERN_ERR
1437
"hcall_vphn() experienced a hardware fault "
1438
"preventing VPHN. Disabling polling...\n");
1439
stop_topology_update();
1440
}
1441
1442
return rc;
1443
}
1444
1445
/*
1446
* Update the node maps and sysfs entries for each cpu whose home node
1447
* has changed.
1448
*/
1449
int arch_update_cpu_topology(void)
1450
{
1451
int cpu, nid, old_nid;
1452
unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
1453
struct sys_device *sysdev;
1454
1455
for_each_cpu(cpu,&cpu_associativity_changes_mask) {
1456
vphn_get_associativity(cpu, associativity);
1457
nid = associativity_to_nid(associativity);
1458
1459
if (nid < 0 || !node_online(nid))
1460
nid = first_online_node;
1461
1462
old_nid = numa_cpu_lookup_table[cpu];
1463
1464
/* Disable hotplug while we update the cpu
1465
* masks and sysfs.
1466
*/
1467
get_online_cpus();
1468
unregister_cpu_under_node(cpu, old_nid);
1469
unmap_cpu_from_node(cpu);
1470
map_cpu_to_node(cpu, nid);
1471
register_cpu_under_node(cpu, nid);
1472
put_online_cpus();
1473
1474
sysdev = get_cpu_sysdev(cpu);
1475
if (sysdev)
1476
kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
1477
}
1478
1479
return 1;
1480
}
1481
1482
static void topology_work_fn(struct work_struct *work)
1483
{
1484
rebuild_sched_domains();
1485
}
1486
static DECLARE_WORK(topology_work, topology_work_fn);
1487
1488
void topology_schedule_update(void)
1489
{
1490
schedule_work(&topology_work);
1491
}
1492
1493
static void topology_timer_fn(unsigned long ignored)
1494
{
1495
if (!vphn_enabled)
1496
return;
1497
if (update_cpu_associativity_changes_mask() > 0)
1498
topology_schedule_update();
1499
set_topology_timer();
1500
}
1501
static struct timer_list topology_timer =
1502
TIMER_INITIALIZER(topology_timer_fn, 0, 0);
1503
1504
static void set_topology_timer(void)
1505
{
1506
topology_timer.data = 0;
1507
topology_timer.expires = jiffies + 60 * HZ;
1508
add_timer(&topology_timer);
1509
}
1510
1511
/*
1512
* Start polling for VPHN associativity changes.
1513
*/
1514
int start_topology_update(void)
1515
{
1516
int rc = 0;
1517
1518
/* Disabled until races with load balancing are fixed */
1519
if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
1520
get_lppaca()->shared_proc) {
1521
vphn_enabled = 1;
1522
setup_cpu_associativity_change_counters();
1523
init_timer_deferrable(&topology_timer);
1524
set_topology_timer();
1525
rc = 1;
1526
}
1527
1528
return rc;
1529
}
1530
__initcall(start_topology_update);
1531
1532
/*
1533
* Disable polling for VPHN associativity changes.
1534
*/
1535
int stop_topology_update(void)
1536
{
1537
vphn_enabled = 0;
1538
return del_timer_sync(&topology_timer);
1539
}
1540
#endif /* CONFIG_PPC_SPLPAR */
1541
1542