#include <linux/string.h>
#include <linux/perf_event.h>
#include <asm/reg.h>
#include <asm/cputable.h>
#include "internal.h"
#define PM_PMC_SH 12
#define PM_PMC_MSK 0xf
#define PM_UNIT_SH 8
#define PM_UNIT_MSK 0xf
#define PM_SPCSEL_SH 6
#define PM_SPCSEL_MSK 3
#define PM_BYTE_SH 4
#define PM_BYTE_MSK 3
#define PM_PMCSEL_MSK 0xf
#define PM_NONE 0
#define PM_FPU 1
#define PM_VPU 2
#define PM_ISU 3
#define PM_IFU 4
#define PM_IDU 5
#define PM_STS 6
#define PM_LSU0 7
#define PM_LSU1U 8
#define PM_LSU1L 9
#define PM_LASTUNIT 9
#define MMCR0_PMC1SEL_SH 8
#define MMCR0_PMC2SEL_SH 1
#define MMCR_PMCSEL_MSK 0x1f
#define MMCR1_TTM0SEL_SH 62
#define MMCR1_TTM1SEL_SH 59
#define MMCR1_TTM3SEL_SH 53
#define MMCR1_TTMSEL_MSK 3
#define MMCR1_TD_CP_DBG0SEL_SH 50
#define MMCR1_TD_CP_DBG1SEL_SH 48
#define MMCR1_TD_CP_DBG2SEL_SH 46
#define MMCR1_TD_CP_DBG3SEL_SH 44
#define MMCR1_PMC1_ADDER_SEL_SH 39
#define MMCR1_PMC2_ADDER_SEL_SH 38
#define MMCR1_PMC6_ADDER_SEL_SH 37
#define MMCR1_PMC5_ADDER_SEL_SH 36
#define MMCR1_PMC8_ADDER_SEL_SH 35
#define MMCR1_PMC7_ADDER_SEL_SH 34
#define MMCR1_PMC3_ADDER_SEL_SH 33
#define MMCR1_PMC4_ADDER_SEL_SH 32
#define MMCR1_PMC3SEL_SH 27
#define MMCR1_PMC4SEL_SH 22
#define MMCR1_PMC5SEL_SH 17
#define MMCR1_PMC6SEL_SH 12
#define MMCR1_PMC7SEL_SH 7
#define MMCR1_PMC8SEL_SH 2
static short mmcr1_adder_bits[8] = {
MMCR1_PMC1_ADDER_SEL_SH,
MMCR1_PMC2_ADDER_SEL_SH,
MMCR1_PMC3_ADDER_SEL_SH,
MMCR1_PMC4_ADDER_SEL_SH,
MMCR1_PMC5_ADDER_SEL_SH,
MMCR1_PMC6_ADDER_SEL_SH,
MMCR1_PMC7_ADDER_SEL_SH,
MMCR1_PMC8_ADDER_SEL_SH
};
static unsigned char direct_marked_event[8] = {
(1<<2) | (1<<3),
(1<<3) | (1<<5),
(1<<3) | (1<<5),
(1<<4) | (1<<5),
(1<<4) | (1<<5),
(1<<3) | (1<<4) | (1<<5),
(1<<4) | (1<<5),
(1<<4)
};
static int p970_marked_instr_event(u64 event)
{
int pmc, psel, unit, byte, bit;
unsigned int mask;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
psel = event & PM_PMCSEL_MSK;
if (pmc) {
if (direct_marked_event[pmc - 1] & (1 << psel))
return 1;
if (psel == 0)
bit = (pmc <= 4)? pmc - 1: 8 - pmc;
else if (psel == 7 || psel == 13)
bit = 4;
else
return 0;
} else
bit = psel;
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
mask = 0;
switch (unit) {
case PM_VPU:
mask = 0x4c;
break;
case PM_LSU0:
mask = 0x085dff00;
break;
case PM_LSU1L:
mask = 0x50 << 24;
break;
}
return (mask >> (byte * 8 + bit)) & 1;
}
static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
[PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
[PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
[PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
[PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
[PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
[PM_STS] = { 0x380000000000ull, 0x310000000000ull },
};
static int p970_get_constraint(u64 event, unsigned long *maskp,
unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh, spcsel;
unsigned long mask = 0, value = 0;
int grp = -1;
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc > 8)
return -1;
sh = (pmc - 1) * 2;
mask |= 2 << sh;
value |= 1 << sh;
grp = ((pmc - 1) >> 1) & 1;
}
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
if (unit) {
if (unit > PM_LASTUNIT)
return -1;
mask |= unit_cons[unit][0];
value |= unit_cons[unit][1];
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
if (!pmc)
grp = byte & 1;
mask |= 0xfULL << (28 - 4 * byte);
value |= (unsigned long)unit << (28 - 4 * byte);
}
if (grp == 0) {
mask |= 0x8000000000ull;
value |= 0x1000000000ull;
} else if (grp == 1) {
mask |= 0x800000000ull;
value |= 0x100000000ull;
}
spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
if (spcsel) {
mask |= 3ull << 48;
value |= (unsigned long)spcsel << 48;
}
*maskp = mask;
*valp = value;
return 0;
}
static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
alt[0] = event;
if (event == 0x2002 || event == 0x3002) {
alt[1] = event ^ 0x1000;
return 2;
}
return 1;
}
static int p970_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
struct perf_event *pevents[],
u32 flags __maybe_unused)
{
unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel;
unsigned int ttm, grp;
unsigned int pmc_inuse = 0;
unsigned int pmc_grp_use[2];
unsigned char busbyte[4];
unsigned char unituse[16];
unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
unsigned char ttmuse[2];
unsigned char pmcsel[8];
int i;
int spcsel;
if (n_ev > 8)
return -1;
pmc_grp_use[0] = pmc_grp_use[1] = 0;
memset(busbyte, 0, sizeof(busbyte));
memset(unituse, 0, sizeof(unituse));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
if (pmc) {
if (pmc_inuse & (1 << (pmc - 1)))
return -1;
pmc_inuse |= 1 << (pmc - 1);
++pmc_grp_use[((pmc - 1) >> 1) & 1];
}
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
if (unit) {
if (unit > PM_LASTUNIT)
return -1;
if (!pmc)
++pmc_grp_use[byte & 1];
if (busbyte[byte] && busbyte[byte] != unit)
return -1;
busbyte[byte] = unit;
unituse[unit] = 1;
}
}
if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
return -1;
if (unituse[PM_ISU] &
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
unitmap[PM_ISU] = 2 | 4;
ttmuse[0] = ttmuse[1] = 0;
for (i = PM_FPU; i <= PM_STS; ++i) {
if (!unituse[i])
continue;
ttm = unitmap[i];
++ttmuse[(ttm >> 2) & 1];
mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
}
if (ttmuse[0] > 1 || ttmuse[1] > 1)
return -1;
for (byte = 0; byte < 4; ++byte) {
unit = busbyte[byte];
if (!unit)
continue;
if (unit <= PM_STS)
ttm = (unitmap[unit] >> 2) & 1;
else if (unit == PM_LSU0)
ttm = 2;
else {
ttm = 3;
if (unit == PM_LSU1L && byte >= 2)
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
}
mmcr1 |= (unsigned long)ttm
<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
}
memset(pmcsel, 0x8, sizeof(pmcsel));
for (i = 0; i < n_ev; ++i) {
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
psel = event[i] & PM_PMCSEL_MSK;
if (!pmc) {
if (unit)
psel |= 0x10 | ((byte & 2) << 2);
else
psel |= 8;
for (pmc = 0; pmc < 8; ++pmc) {
if (pmc_inuse & (1 << pmc))
continue;
grp = (pmc >> 1) & 1;
if (unit) {
if (grp == (byte & 1))
break;
} else if (pmc_grp_use[grp] < 4) {
++pmc_grp_use[grp];
break;
}
}
pmc_inuse |= 1 << pmc;
} else {
--pmc;
if (psel == 0 && (byte & 2))
mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
}
pmcsel[pmc] = psel;
hwc[i] = pmc;
spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
mmcr1 |= spcsel;
if (p970_marked_instr_event(event[i]))
mmcra |= MMCRA_SAMPLE_ENABLE;
}
for (pmc = 0; pmc < 2; ++pmc)
mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
for (; pmc < 8; ++pmc)
mmcr1 |= (unsigned long)pmcsel[pmc]
<< (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
if (pmc_inuse & 1)
mmcr0 |= MMCR0_PMC1CE;
if (pmc_inuse & 0xfe)
mmcr0 |= MMCR0_PMCjCE;
mmcra |= 0x2000;
mmcr->mmcr0 = mmcr0;
mmcr->mmcr1 = mmcr1;
mmcr->mmcra = mmcra;
return 0;
}
static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
int shift;
if (pmc <= 1) {
shift = MMCR0_PMC1SEL_SH - 7 * pmc;
mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift);
} else {
shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift);
}
}
static int ppc970_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = 7,
[PERF_COUNT_HW_INSTRUCTIONS] = 1,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810,
[PERF_COUNT_HW_CACHE_MISSES] = 0x3810,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x327,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = {
[C(OP_READ)] = { 0x8810, 0x3810 },
[C(OP_WRITE)] = { 0x7810, 0x813 },
[C(OP_PREFETCH)] = { 0x731, 0 },
},
[C(L1I)] = {
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { 0, 0 },
},
[C(LL)] = {
[C(OP_READ)] = { 0, 0 },
[C(OP_WRITE)] = { 0, 0 },
[C(OP_PREFETCH)] = { 0x733, 0 },
},
[C(DTLB)] = {
[C(OP_READ)] = { 0, 0x704 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(ITLB)] = {
[C(OP_READ)] = { 0, 0x700 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(BPU)] = {
[C(OP_READ)] = { 0x431, 0x327 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
[C(NODE)] = {
[C(OP_READ)] = { -1, -1 },
[C(OP_WRITE)] = { -1, -1 },
[C(OP_PREFETCH)] = { -1, -1 },
},
};
static struct power_pmu ppc970_pmu = {
.name = "PPC970/FX/MP",
.n_counter = 8,
.max_alternatives = 2,
.add_fields = 0x001100005555ull,
.test_adder = 0x013300000000ull,
.compute_mmcr = p970_compute_mmcr,
.get_constraint = p970_get_constraint,
.get_alternatives = p970_get_alternatives,
.disable_pmc = p970_disable_pmc,
.n_generic = ARRAY_SIZE(ppc970_generic_events),
.generic_events = ppc970_generic_events,
.cache_events = &ppc970_cache_events,
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
int __init init_ppc970_pmu(void)
{
unsigned int pvr = mfspr(SPRN_PVR);
if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP &&
PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX)
return -ENODEV;
return register_power_pmu(&ppc970_pmu);
}