Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/hyperv/irqdomain.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
5
*
6
* Authors:
7
* Sunil Muthuswamy <[email protected]>
8
* Wei Liu <[email protected]>
9
*/
10
11
#include <linux/pci.h>
12
#include <linux/irq.h>
13
#include <linux/export.h>
14
#include <asm/mshyperv.h>
15
16
static int hv_map_interrupt(union hv_device_id device_id, bool level,
17
int cpu, int vector, struct hv_interrupt_entry *entry)
18
{
19
struct hv_input_map_device_interrupt *input;
20
struct hv_output_map_device_interrupt *output;
21
struct hv_device_interrupt_descriptor *intr_desc;
22
unsigned long flags;
23
u64 status;
24
int nr_bank, var_size;
25
26
local_irq_save(flags);
27
28
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
29
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
30
31
intr_desc = &input->interrupt_descriptor;
32
memset(input, 0, sizeof(*input));
33
input->partition_id = hv_current_partition_id;
34
input->device_id = device_id.as_uint64;
35
intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
36
intr_desc->vector_count = 1;
37
intr_desc->target.vector = vector;
38
39
if (level)
40
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
41
else
42
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
43
44
intr_desc->target.vp_set.valid_bank_mask = 0;
45
intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
46
nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
47
if (nr_bank < 0) {
48
local_irq_restore(flags);
49
pr_err("%s: unable to generate VP set\n", __func__);
50
return -EINVAL;
51
}
52
intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
53
54
/*
55
* var-sized hypercall, var-size starts after vp_mask (thus
56
* vp_set.format does not count, but vp_set.valid_bank_mask
57
* does).
58
*/
59
var_size = nr_bank + 1;
60
61
status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
62
input, output);
63
*entry = output->interrupt_entry;
64
65
local_irq_restore(flags);
66
67
if (!hv_result_success(status))
68
hv_status_err(status, "\n");
69
70
return hv_result_to_errno(status);
71
}
72
73
static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
74
{
75
unsigned long flags;
76
struct hv_input_unmap_device_interrupt *input;
77
struct hv_interrupt_entry *intr_entry;
78
u64 status;
79
80
local_irq_save(flags);
81
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
82
83
memset(input, 0, sizeof(*input));
84
intr_entry = &input->interrupt_entry;
85
input->partition_id = hv_current_partition_id;
86
input->device_id = id;
87
*intr_entry = *old_entry;
88
89
status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
90
local_irq_restore(flags);
91
92
if (!hv_result_success(status))
93
hv_status_err(status, "\n");
94
95
return hv_result_to_errno(status);
96
}
97
98
#ifdef CONFIG_PCI_MSI
99
struct rid_data {
100
struct pci_dev *bridge;
101
u32 rid;
102
};
103
104
static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
105
{
106
struct rid_data *rd = data;
107
u8 bus = PCI_BUS_NUM(rd->rid);
108
109
if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
110
rd->bridge = pdev;
111
rd->rid = alias;
112
}
113
114
return 0;
115
}
116
117
static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
118
{
119
union hv_device_id dev_id;
120
struct rid_data data = {
121
.bridge = NULL,
122
.rid = PCI_DEVID(dev->bus->number, dev->devfn)
123
};
124
125
pci_for_each_dma_alias(dev, get_rid_cb, &data);
126
127
dev_id.as_uint64 = 0;
128
dev_id.device_type = HV_DEVICE_TYPE_PCI;
129
dev_id.pci.segment = pci_domain_nr(dev->bus);
130
131
dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
132
dev_id.pci.bdf.device = PCI_SLOT(data.rid);
133
dev_id.pci.bdf.function = PCI_FUNC(data.rid);
134
dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
135
136
if (data.bridge) {
137
int pos;
138
139
/*
140
* Microsoft Hypervisor requires a bus range when the bridge is
141
* running in PCI-X mode.
142
*
143
* To distinguish conventional vs PCI-X bridge, we can check
144
* the bridge's PCI-X Secondary Status Register, Secondary Bus
145
* Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
146
* Specification Revision 1.0 5.2.2.1.3.
147
*
148
* Value zero means it is in conventional mode, otherwise it is
149
* in PCI-X mode.
150
*/
151
152
pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
153
if (pos) {
154
u16 status;
155
156
pci_read_config_word(data.bridge, pos +
157
PCI_X_BRIDGE_SSTATUS, &status);
158
159
if (status & PCI_X_SSTATUS_FREQ) {
160
/* Non-zero, PCI-X mode */
161
u8 sec_bus, sub_bus;
162
163
dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
164
165
pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
166
dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
167
pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
168
dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
169
}
170
}
171
}
172
173
return dev_id;
174
}
175
176
/**
177
* hv_map_msi_interrupt() - "Map" the MSI IRQ in the hypervisor.
178
* @data: Describes the IRQ
179
* @out_entry: Hypervisor (MSI) interrupt entry (can be NULL)
180
*
181
* Map the IRQ in the hypervisor by issuing a MAP_DEVICE_INTERRUPT hypercall.
182
*
183
* Return: 0 on success, -errno on failure
184
*/
185
int hv_map_msi_interrupt(struct irq_data *data,
186
struct hv_interrupt_entry *out_entry)
187
{
188
struct irq_cfg *cfg = irqd_cfg(data);
189
struct hv_interrupt_entry dummy;
190
union hv_device_id device_id;
191
struct msi_desc *msidesc;
192
struct pci_dev *dev;
193
int cpu;
194
195
msidesc = irq_data_get_msi_desc(data);
196
dev = msi_desc_to_pci_dev(msidesc);
197
device_id = hv_build_pci_dev_id(dev);
198
cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
199
200
return hv_map_interrupt(device_id, false, cpu, cfg->vector,
201
out_entry ? out_entry : &dummy);
202
}
203
EXPORT_SYMBOL_GPL(hv_map_msi_interrupt);
204
205
static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
206
{
207
/* High address is always 0 */
208
msg->address_hi = 0;
209
msg->address_lo = entry->msi_entry.address.as_uint32;
210
msg->data = entry->msi_entry.data.as_uint32;
211
}
212
213
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
214
static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
215
{
216
struct hv_interrupt_entry *stored_entry;
217
struct irq_cfg *cfg = irqd_cfg(data);
218
struct msi_desc *msidesc;
219
struct pci_dev *dev;
220
int ret;
221
222
msidesc = irq_data_get_msi_desc(data);
223
dev = msi_desc_to_pci_dev(msidesc);
224
225
if (!cfg) {
226
pr_debug("%s: cfg is NULL", __func__);
227
return;
228
}
229
230
if (data->chip_data) {
231
/*
232
* This interrupt is already mapped. Let's unmap first.
233
*
234
* We don't use retarget interrupt hypercalls here because
235
* Microsoft Hypervisor doesn't allow root to change the vector
236
* or specify VPs outside of the set that is initially used
237
* during mapping.
238
*/
239
stored_entry = data->chip_data;
240
data->chip_data = NULL;
241
242
ret = hv_unmap_msi_interrupt(dev, stored_entry);
243
244
kfree(stored_entry);
245
246
if (ret)
247
return;
248
}
249
250
stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
251
if (!stored_entry) {
252
pr_debug("%s: failed to allocate chip data\n", __func__);
253
return;
254
}
255
256
ret = hv_map_msi_interrupt(data, stored_entry);
257
if (ret) {
258
kfree(stored_entry);
259
return;
260
}
261
262
data->chip_data = stored_entry;
263
entry_to_msi_msg(data->chip_data, msg);
264
265
return;
266
}
267
268
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
269
{
270
return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
271
}
272
273
static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
274
{
275
struct hv_interrupt_entry old_entry;
276
struct msi_msg msg;
277
278
if (!irqd->chip_data) {
279
pr_debug("%s: no chip data\n!", __func__);
280
return;
281
}
282
283
old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
284
entry_to_msi_msg(&old_entry, &msg);
285
286
kfree(irqd->chip_data);
287
irqd->chip_data = NULL;
288
289
(void)hv_unmap_msi_interrupt(dev, &old_entry);
290
}
291
292
static void hv_msi_free_irq(struct irq_domain *domain,
293
struct msi_domain_info *info, unsigned int virq)
294
{
295
struct irq_data *irqd = irq_get_irq_data(virq);
296
struct msi_desc *desc;
297
298
if (!irqd)
299
return;
300
301
desc = irq_data_get_msi_desc(irqd);
302
if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
303
return;
304
305
hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
306
}
307
308
/*
309
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
310
* which implement the MSI or MSI-X Capability Structure.
311
*/
312
static struct irq_chip hv_pci_msi_controller = {
313
.name = "HV-PCI-MSI",
314
.irq_unmask = pci_msi_unmask_irq,
315
.irq_mask = pci_msi_mask_irq,
316
.irq_ack = irq_chip_ack_parent,
317
.irq_retrigger = irq_chip_retrigger_hierarchy,
318
.irq_compose_msi_msg = hv_irq_compose_msi_msg,
319
.irq_set_affinity = msi_domain_set_affinity,
320
.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED,
321
};
322
323
static struct msi_domain_ops pci_msi_domain_ops = {
324
.msi_free = hv_msi_free_irq,
325
.msi_prepare = pci_msi_prepare,
326
};
327
328
static struct msi_domain_info hv_pci_msi_domain_info = {
329
.flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
330
MSI_FLAG_PCI_MSIX,
331
.ops = &pci_msi_domain_ops,
332
.chip = &hv_pci_msi_controller,
333
.handler = handle_edge_irq,
334
.handler_name = "edge",
335
};
336
337
struct irq_domain * __init hv_create_pci_msi_domain(void)
338
{
339
struct irq_domain *d = NULL;
340
struct fwnode_handle *fn;
341
342
fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
343
if (fn)
344
d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
345
346
/* No point in going further if we can't get an irq domain */
347
BUG_ON(!d);
348
349
return d;
350
}
351
352
#endif /* CONFIG_PCI_MSI */
353
354
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
355
{
356
union hv_device_id device_id;
357
358
device_id.as_uint64 = 0;
359
device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
360
device_id.ioapic.ioapic_id = (u8)ioapic_id;
361
362
return hv_unmap_interrupt(device_id.as_uint64, entry);
363
}
364
EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
365
366
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
367
struct hv_interrupt_entry *entry)
368
{
369
union hv_device_id device_id;
370
371
device_id.as_uint64 = 0;
372
device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
373
device_id.ioapic.ioapic_id = (u8)ioapic_id;
374
375
return hv_map_interrupt(device_id, level, cpu, vector, entry);
376
}
377
EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
378
379