Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/r600/r600_gpu_load.c
4570 views
1
/*
2
* Copyright 2015 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors: Marek Olšák <[email protected]>
24
*
25
*/
26
27
/* The GPU load is measured as follows.
28
*
29
* There is a thread which samples the GRBM_STATUS register at a certain
30
* frequency and the "busy" or "idle" counter is incremented based on
31
* whether the GUI_ACTIVE bit is set or not.
32
*
33
* Then, the user can sample the counters twice and calculate the average
34
* GPU load between the two samples.
35
*/
36
37
#include "r600_pipe_common.h"
38
#include "r600_query.h"
39
#include "util/os_time.h"
40
41
/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
42
* fps (there are too few samples per frame). */
43
#define SAMPLES_PER_SEC 10000
44
45
#define GRBM_STATUS 0x8010
46
#define TA_BUSY(x) (((x) >> 14) & 0x1)
47
#define GDS_BUSY(x) (((x) >> 15) & 0x1)
48
#define VGT_BUSY(x) (((x) >> 17) & 0x1)
49
#define IA_BUSY(x) (((x) >> 19) & 0x1)
50
#define SX_BUSY(x) (((x) >> 20) & 0x1)
51
#define WD_BUSY(x) (((x) >> 21) & 0x1)
52
#define SPI_BUSY(x) (((x) >> 22) & 0x1)
53
#define BCI_BUSY(x) (((x) >> 23) & 0x1)
54
#define SC_BUSY(x) (((x) >> 24) & 0x1)
55
#define PA_BUSY(x) (((x) >> 25) & 0x1)
56
#define DB_BUSY(x) (((x) >> 26) & 0x1)
57
#define CP_BUSY(x) (((x) >> 29) & 0x1)
58
#define CB_BUSY(x) (((x) >> 30) & 0x1)
59
#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
60
61
#define SRBM_STATUS2 0x0e4c
62
#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
63
64
#define CP_STAT 0x8680
65
#define PFP_BUSY(x) (((x) >> 15) & 0x1)
66
#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
67
#define ME_BUSY(x) (((x) >> 17) & 0x1)
68
#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
69
#define DMA_BUSY(x) (((x) >> 22) & 0x1)
70
#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
71
72
#define IDENTITY(x) x
73
74
#define UPDATE_COUNTER(field, mask) \
75
do { \
76
if (mask(value)) \
77
p_atomic_inc(&counters->named.field.busy); \
78
else \
79
p_atomic_inc(&counters->named.field.idle); \
80
} while (0)
81
82
static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
83
union r600_mmio_counters *counters)
84
{
85
uint32_t value = 0;
86
bool gui_busy, sdma_busy = false;
87
88
/* GRBM_STATUS */
89
rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
90
91
UPDATE_COUNTER(ta, TA_BUSY);
92
UPDATE_COUNTER(gds, GDS_BUSY);
93
UPDATE_COUNTER(vgt, VGT_BUSY);
94
UPDATE_COUNTER(ia, IA_BUSY);
95
UPDATE_COUNTER(sx, SX_BUSY);
96
UPDATE_COUNTER(wd, WD_BUSY);
97
UPDATE_COUNTER(spi, SPI_BUSY);
98
UPDATE_COUNTER(bci, BCI_BUSY);
99
UPDATE_COUNTER(sc, SC_BUSY);
100
UPDATE_COUNTER(pa, PA_BUSY);
101
UPDATE_COUNTER(db, DB_BUSY);
102
UPDATE_COUNTER(cp, CP_BUSY);
103
UPDATE_COUNTER(cb, CB_BUSY);
104
UPDATE_COUNTER(gui, GUI_ACTIVE);
105
gui_busy = GUI_ACTIVE(value);
106
107
value = gui_busy || sdma_busy;
108
UPDATE_COUNTER(gpu, IDENTITY);
109
}
110
111
#undef UPDATE_COUNTER
112
113
static int
114
r600_gpu_load_thread(void *param)
115
{
116
struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
117
const int period_us = 1000000 / SAMPLES_PER_SEC;
118
int sleep_us = period_us;
119
int64_t cur_time, last_time = os_time_get();
120
121
while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
122
if (sleep_us)
123
os_time_sleep(sleep_us);
124
125
/* Make sure we sleep the ideal amount of time to match
126
* the expected frequency. */
127
cur_time = os_time_get();
128
129
if (os_time_timeout(last_time, last_time + period_us,
130
cur_time))
131
sleep_us = MAX2(sleep_us - 1, 1);
132
else
133
sleep_us += 1;
134
135
/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
136
last_time = cur_time;
137
138
/* Update the counters. */
139
r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
140
}
141
p_atomic_dec(&rscreen->gpu_load_stop_thread);
142
return 0;
143
}
144
145
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
146
{
147
if (!rscreen->gpu_load_thread)
148
return;
149
150
p_atomic_inc(&rscreen->gpu_load_stop_thread);
151
thrd_join(rscreen->gpu_load_thread, NULL);
152
rscreen->gpu_load_thread = 0;
153
}
154
155
static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
156
unsigned busy_index)
157
{
158
/* Start the thread if needed. */
159
if (!rscreen->gpu_load_thread) {
160
mtx_lock(&rscreen->gpu_load_mutex);
161
/* Check again inside the mutex. */
162
if (!rscreen->gpu_load_thread)
163
rscreen->gpu_load_thread =
164
u_thread_create(r600_gpu_load_thread, rscreen);
165
mtx_unlock(&rscreen->gpu_load_mutex);
166
}
167
168
unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
169
unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
170
171
return busy | ((uint64_t)idle << 32);
172
}
173
174
static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
175
uint64_t begin, unsigned busy_index)
176
{
177
uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
178
unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
179
unsigned idle = (end >> 32) - (begin >> 32);
180
181
/* Calculate the % of time the busy counter was being incremented.
182
*
183
* If no counters were incremented, return the current counter status.
184
* It's for the case when the load is queried faster than
185
* the counters are updated.
186
*/
187
if (idle || busy) {
188
return busy*100 / (busy + idle);
189
} else {
190
union r600_mmio_counters counters;
191
192
memset(&counters, 0, sizeof(counters));
193
r600_update_mmio_counters(rscreen, &counters);
194
return counters.array[busy_index] ? 100 : 0;
195
}
196
}
197
198
#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
199
rscreen->mmio_counters.array)
200
201
static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
202
unsigned type)
203
{
204
switch (type) {
205
case R600_QUERY_GPU_LOAD:
206
return BUSY_INDEX(rscreen, gpu);
207
case R600_QUERY_GPU_SHADERS_BUSY:
208
return BUSY_INDEX(rscreen, spi);
209
case R600_QUERY_GPU_TA_BUSY:
210
return BUSY_INDEX(rscreen, ta);
211
case R600_QUERY_GPU_GDS_BUSY:
212
return BUSY_INDEX(rscreen, gds);
213
case R600_QUERY_GPU_VGT_BUSY:
214
return BUSY_INDEX(rscreen, vgt);
215
case R600_QUERY_GPU_IA_BUSY:
216
return BUSY_INDEX(rscreen, ia);
217
case R600_QUERY_GPU_SX_BUSY:
218
return BUSY_INDEX(rscreen, sx);
219
case R600_QUERY_GPU_WD_BUSY:
220
return BUSY_INDEX(rscreen, wd);
221
case R600_QUERY_GPU_BCI_BUSY:
222
return BUSY_INDEX(rscreen, bci);
223
case R600_QUERY_GPU_SC_BUSY:
224
return BUSY_INDEX(rscreen, sc);
225
case R600_QUERY_GPU_PA_BUSY:
226
return BUSY_INDEX(rscreen, pa);
227
case R600_QUERY_GPU_DB_BUSY:
228
return BUSY_INDEX(rscreen, db);
229
case R600_QUERY_GPU_CP_BUSY:
230
return BUSY_INDEX(rscreen, cp);
231
case R600_QUERY_GPU_CB_BUSY:
232
return BUSY_INDEX(rscreen, cb);
233
case R600_QUERY_GPU_SDMA_BUSY:
234
return BUSY_INDEX(rscreen, sdma);
235
case R600_QUERY_GPU_PFP_BUSY:
236
return BUSY_INDEX(rscreen, pfp);
237
case R600_QUERY_GPU_MEQ_BUSY:
238
return BUSY_INDEX(rscreen, meq);
239
case R600_QUERY_GPU_ME_BUSY:
240
return BUSY_INDEX(rscreen, me);
241
case R600_QUERY_GPU_SURF_SYNC_BUSY:
242
return BUSY_INDEX(rscreen, surf_sync);
243
case R600_QUERY_GPU_CP_DMA_BUSY:
244
return BUSY_INDEX(rscreen, cp_dma);
245
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
246
return BUSY_INDEX(rscreen, scratch_ram);
247
default:
248
unreachable("invalid query type");
249
}
250
}
251
252
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
253
{
254
unsigned busy_index = busy_index_from_type(rscreen, type);
255
return r600_read_mmio_counter(rscreen, busy_index);
256
}
257
258
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
259
uint64_t begin)
260
{
261
unsigned busy_index = busy_index_from_type(rscreen, type);
262
return r600_end_mmio_counter(rscreen, begin, busy_index);
263
}
264
265