Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/hyperv/irqdomain.c
51289 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5
*
6
* Authors:
7
* Sunil Muthuswamy <[email protected]>
8
* Wei Liu <[email protected]>
9
*/
10
11
#include <linux/pci.h>
12
#include <linux/irq.h>
13
#include <linux/export.h>
14
#include <linux/irqchip/irq-msi-lib.h>
15
#include <asm/mshyperv.h>
16
17
static int hv_map_interrupt(union hv_device_id device_id, bool level,
18
int cpu, int vector, struct hv_interrupt_entry *entry)
19
{
20
struct hv_input_map_device_interrupt *input;
21
struct hv_output_map_device_interrupt *output;
22
struct hv_device_interrupt_descriptor *intr_desc;
23
unsigned long flags;
24
u64 status;
25
int nr_bank, var_size;
26
27
local_irq_save(flags);
28
29
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
30
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
31
32
intr_desc = &input->interrupt_descriptor;
33
memset(input, 0, sizeof(*input));
34
input->partition_id = hv_current_partition_id;
35
input->device_id = device_id.as_uint64;
36
intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
37
intr_desc->vector_count = 1;
38
intr_desc->target.vector = vector;
39
40
if (level)
41
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
42
else
43
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
44
45
intr_desc->target.vp_set.valid_bank_mask = 0;
46
intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
47
nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
48
if (nr_bank < 0) {
49
local_irq_restore(flags);
50
pr_err("%s: unable to generate VP set\n", __func__);
51
return -EINVAL;
52
}
53
intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
54
55
/*
56
* var-sized hypercall, var-size starts after vp_mask (thus
57
* vp_set.format does not count, but vp_set.valid_bank_mask
58
* does).
59
*/
60
var_size = nr_bank + 1;
61
62
status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
63
input, output);
64
*entry = output->interrupt_entry;
65
66
local_irq_restore(flags);
67
68
if (!hv_result_success(status))
69
hv_status_err(status, "\n");
70
71
return hv_result_to_errno(status);
72
}
73
74
static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
75
{
76
unsigned long flags;
77
struct hv_input_unmap_device_interrupt *input;
78
struct hv_interrupt_entry *intr_entry;
79
u64 status;
80
81
local_irq_save(flags);
82
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
83
84
memset(input, 0, sizeof(*input));
85
intr_entry = &input->interrupt_entry;
86
input->partition_id = hv_current_partition_id;
87
input->device_id = id;
88
*intr_entry = *old_entry;
89
90
status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
91
local_irq_restore(flags);
92
93
if (!hv_result_success(status))
94
hv_status_err(status, "\n");
95
96
return hv_result_to_errno(status);
97
}
98
99
#ifdef CONFIG_PCI_MSI
100
struct rid_data {
101
struct pci_dev *bridge;
102
u32 rid;
103
};
104
105
static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
106
{
107
struct rid_data *rd = data;
108
u8 bus = PCI_BUS_NUM(rd->rid);
109
110
if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
111
rd->bridge = pdev;
112
rd->rid = alias;
113
}
114
115
return 0;
116
}
117
118
static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
119
{
120
union hv_device_id dev_id;
121
struct rid_data data = {
122
.bridge = NULL,
123
.rid = PCI_DEVID(dev->bus->number, dev->devfn)
124
};
125
126
pci_for_each_dma_alias(dev, get_rid_cb, &data);
127
128
dev_id.as_uint64 = 0;
129
dev_id.device_type = HV_DEVICE_TYPE_PCI;
130
dev_id.pci.segment = pci_domain_nr(dev->bus);
131
132
dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
133
dev_id.pci.bdf.device = PCI_SLOT(data.rid);
134
dev_id.pci.bdf.function = PCI_FUNC(data.rid);
135
dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
136
137
if (data.bridge) {
138
int pos;
139
140
/*
141
* Microsoft Hypervisor requires a bus range when the bridge is
142
* running in PCI-X mode.
143
*
144
* To distinguish conventional vs PCI-X bridge, we can check
145
* the bridge's PCI-X Secondary Status Register, Secondary Bus
146
* Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
147
* Specification Revision 1.0 5.2.2.1.3.
148
*
149
* Value zero means it is in conventional mode, otherwise it is
150
* in PCI-X mode.
151
*/
152
153
pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
154
if (pos) {
155
u16 status;
156
157
pci_read_config_word(data.bridge, pos +
158
PCI_X_BRIDGE_SSTATUS, &status);
159
160
if (status & PCI_X_SSTATUS_FREQ) {
161
/* Non-zero, PCI-X mode */
162
u8 sec_bus, sub_bus;
163
164
dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
165
166
pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
167
dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
168
pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
169
dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
170
}
171
}
172
}
173
174
return dev_id;
175
}
176
177
/**
178
* hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor.
179
* @data: Describes the IRQ
180
* @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
181
*
182
* Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
183
*
184
* Return: 0 on success, -errno on failure
185
*/
186
int hv_map_msi_interrupt(struct irq_data *data,
187
struct hv_interrupt_entry *out_entry)
188
{
189
struct irq_cfg *cfg = irqd_cfg(data);
190
struct hv_interrupt_entry dummy;
191
union hv_device_id device_id;
192
struct msi_desc *msidesc;
193
struct pci_dev *dev;
194
int cpu;
195
196
msidesc = irq_data_get_msi_desc(data);
197
dev = msi_desc_to_pci_dev(msidesc);
198
device_id = hv_build_pci_dev_id(dev);
199
cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
200
201
return hv_map_interrupt(device_id, false, cpu, cfg->vector,
202
out_entry ? out_entry : &dummy);
203
}
204
EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
205
206
static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
207
{
208
/* High address is always 0 */
209
msg->address_hi = 0;
210
msg->address_lo = entry->msi_entry.address.as_uint32;
211
msg->data = entry->msi_entry.data.as_uint32;
212
}
213
214
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
215
static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
216
{
217
struct hv_interrupt_entry *stored_entry;
218
struct irq_cfg *cfg = irqd_cfg(data);
219
struct msi_desc *msidesc;
220
struct pci_dev *dev;
221
int ret;
222
223
msidesc = irq_data_get_msi_desc(data);
224
dev = msi_desc_to_pci_dev(msidesc);
225
226
if (!cfg) {
227
pr_debug("%s: cfg is NULL", __func__);
228
return;
229
}
230
231
if (data->chip_data) {
232
/*
233
* This interrupt is already mapped. Let's unmap first.
234
*
235
* We don't use retarget interrupt hypercalls here because
236
* Microsoft Hypervisor doesn't allow root to change the vector
237
* or specify VPs outside of the set that is initially used
238
* during mapping.
239
*/
240
stored_entry = data->chip_data;
241
data->chip_data = NULL;
242
243
ret = hv_unmap_msi_interrupt(dev, stored_entry);
244
245
kfree(stored_entry);
246
247
if (ret)
248
return;
249
}
250
251
stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
252
if (!stored_entry) {
253
pr_debug("%s: failed to allocate chip data\n", __func__);
254
return;
255
}
256
257
ret = hv_map_msi_interrupt(data, stored_entry);
258
if (ret) {
259
kfree(stored_entry);
260
return;
261
}
262
263
data->chip_data = stored_entry;
264
entry_to_msi_msg(data->chip_data, msg);
265
266
return;
267
}
268
269
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
270
{
271
return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
272
}
273
274
static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
275
{
276
struct hv_interrupt_entry old_entry;
277
struct msi_msg msg;
278
279
if (!irqd->chip_data) {
280
pr_debug("%s: no chip data\n!", __func__);
281
return;
282
}
283
284
old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
285
entry_to_msi_msg(&old_entry, &msg);
286
287
kfree(irqd->chip_data);
288
irqd->chip_data = NULL;
289
290
(void)hv_unmap_msi_interrupt(dev, &old_entry);
291
}
292
293
/*
294
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
295
* which implement the MSI or MSI-X Capability Structure.
296
*/
297
static struct irq_chip hv_pci_msi_controller = {
298
.name = "HV-PCI-MSI",
299
.irq_ack = irq_chip_ack_parent,
300
.irq_compose_msi_msg = hv_irq_compose_msi_msg,
301
.irq_set_affinity = irq_chip_set_affinity_parent,
302
};
303
304
static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
305
struct irq_domain *real_parent, struct msi_domain_info *info)
306
{
307
struct irq_chip *chip = info->chip;
308
309
if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
310
return false;
311
312
chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED;
313
314
info->ops->msi_prepare = pci_msi_prepare;
315
316
return true;
317
}
318
319
#define HV_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
320
#define HV_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
321
322
static struct msi_parent_ops hv_msi_parent_ops = {
323
.supported_flags = HV_MSI_FLAGS_SUPPORTED,
324
.required_flags = HV_MSI_FLAGS_REQUIRED,
325
.bus_select_token = DOMAIN_BUS_NEXUS,
326
.bus_select_mask = MATCH_PCI_MSI,
327
.chip_flags = MSI_CHIP_FLAG_SET_ACK,
328
.prefix = "HV-",
329
.init_dev_msi_info = hv_init_dev_msi_info,
330
};
331
332
static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs,
333
void *arg)
334
{
335
/*
336
* TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except
337
* entry_to_msi_msg() should be in here.
338
*/
339
340
int ret;
341
342
ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
343
if (ret)
344
return ret;
345
346
for (int i = 0; i < nr_irqs; ++i) {
347
irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL,
348
handle_edge_irq, NULL, "edge");
349
}
350
return 0;
351
}
352
353
static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
354
{
355
for (int i = 0; i < nr_irqs; ++i) {
356
struct irq_data *irqd = irq_domain_get_irq_data(d, virq);
357
struct msi_desc *desc;
358
359
desc = irq_data_get_msi_desc(irqd);
360
if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
361
continue;
362
363
hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
364
}
365
irq_domain_free_irqs_top(d, virq, nr_irqs);
366
}
367
368
static const struct irq_domain_ops hv_msi_domain_ops = {
369
.select = msi_lib_irq_domain_select,
370
.alloc = hv_msi_domain_alloc,
371
.free = hv_msi_domain_free,
372
};
373
374
struct irq_domain * __init hv_create_pci_msi_domain(void)
375
{
376
struct irq_domain *d = NULL;
377
378
struct irq_domain_info info = {
379
.fwnode = irq_domain_alloc_named_fwnode("HV-PCI-MSI"),
380
.ops = &hv_msi_domain_ops,
381
.parent = x86_vector_domain,
382
};
383
384
if (info.fwnode)
385
d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops);
386
387
/* No point in going further if we can't get an irq domain */
388
BUG_ON(!d);
389
390
return d;
391
}
392
393
#endif /* CONFIG_PCI_MSI */
394
395
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
396
{
397
union hv_device_id device_id;
398
399
device_id.as_uint64 = 0;
400
device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
401
device_id.ioapic.ioapic_id = (u8)ioapic_id;
402
403
return hv_unmap_interrupt(device_id.as_uint64, entry);
404
}
405
EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
406
407
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
408
struct hv_interrupt_entry *entry)
409
{
410
union hv_device_id device_id;
411
412
device_id.as_uint64 = 0;
413
device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
414
device_id.ioapic.ioapic_id = (u8)ioapic_id;
415
416
return hv_map_interrupt(device_id, level, cpu, vector, entry);
417
}
418
EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
419
420