CoCalc -- freedreno

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_autotune.c
⁴⁵⁷⁰ views
1
/*
2
 * Copyright © 2021 Google, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 */
23

24
#include "freedreno_autotune.h"
25
#include "freedreno_batch.h"
26
#include "freedreno_util.h"
27

28
/**
29
 * Tracks, for a given batch key (which maps to a FBO/framebuffer state),
30
 *
31
 * ralloc parent is fd_autotune::ht
32
 */
33
struct fd_batch_history {
34
   struct fd_batch_key *key;
35

36
   /* Entry in fd_autotune::lru: */
37
   struct list_head node;
38

39
   unsigned num_results;
40

41
   /**
42
    * List of recent fd_batch_result's
43
    */
44
   struct list_head results;
45
#define MAX_RESULTS 5
46
};
47

48
static struct fd_batch_history *
49
get_history(struct fd_autotune *at, struct fd_batch *batch)
50
{
51
   struct fd_batch_history *history;
52

53
   if (!batch->key)
54
      return NULL;
55

56
   struct hash_entry *entry =
57
      _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);
58

59
   if (entry) {
60
      history = entry->data;
61
      goto found;
62
   }
63

64
   history = rzalloc_size(at->ht, sizeof(*history));
65

66
   history->key = fd_batch_key_clone(history, batch->key);
67
   list_inithead(&history->node);
68
   list_inithead(&history->results);
69

70
   /* Note: We cap # of cached GMEM states at 20.. so assuming double-
71
    * buffering, 40 should be a good place to cap cached autotune state
72
    */
73
   if (at->ht->entries >= 40) {
74
      struct fd_batch_history *last =
75
         list_last_entry(&at->lru, struct fd_batch_history, node);
76
      _mesa_hash_table_remove_key(at->ht, last->key);
77
      list_del(&last->node);
78
      ralloc_free(last);
79
   }
80

81
   _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key,
82
                                      history);
83

84
found:
85
   /* Move to the head of the LRU: */
86
   list_delinit(&history->node);
87
   list_add(&history->node, &at->lru);
88

89
   return history;
90
}
91

92
static void
93
result_destructor(void *r)
94
{
95
   struct fd_batch_result *result = r;
96

97
   /* Just in case we manage to somehow still be on the pending_results list: */
98
   list_del(&result->node);
99
}
100

101
static struct fd_batch_result *
102
get_result(struct fd_autotune *at, struct fd_batch_history *history)
103
{
104
   struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));
105

106
   result->fence =
107
      ++at->fence_counter; /* pre-increment so zero isn't valid fence */
108
   result->idx = at->idx_counter++;
109

110
   if (at->idx_counter >= ARRAY_SIZE(at->results->result))
111
      at->idx_counter = 0;
112

113
   result->history = history;
114
   list_addtail(&result->node, &at->pending_results);
115

116
   ralloc_set_destructor(result, result_destructor);
117

118
   return result;
119
}
120

121
static void
122
process_results(struct fd_autotune *at)
123
{
124
   uint32_t current_fence = at->results->fence;
125

126
   list_for_each_entry_safe (struct fd_batch_result, result,
127
                             &at->pending_results, node) {
128
      if (result->fence > current_fence)
129
         break;
130

131
      struct fd_batch_history *history = result->history;
132

133
      result->samples_passed = at->results->result[result->idx].samples_end -
134
                               at->results->result[result->idx].samples_start;
135

136
      list_delinit(&result->node);
137
      list_add(&result->node, &history->results);
138

139
      if (history->num_results < MAX_RESULTS) {
140
         history->num_results++;
141
      } else {
142
         /* Once above a limit, start popping old results off the
143
          * tail of the list:
144
          */
145
         struct fd_batch_result *old_result =
146
            list_last_entry(&history->results, struct fd_batch_result, node);
147
         list_delinit(&old_result->node);
148
         ralloc_free(old_result);
149
      }
150
   }
151
}
152

153
static bool
154
fallback_use_bypass(struct fd_batch *batch)
155
{
156
   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
157

158
   /* Fallback logic if we have no historical data about the rendertarget: */
159
   if (batch->cleared || batch->gmem_reason ||
160
       (batch->num_draws > 5) || (pfb->samples > 1)) {
161
      return false;
162
   }
163

164
   return true;
165
}
166

167
/**
168
 * A magic 8-ball that tells the gmem code whether we should do bypass mode
169
 * for moar fps.
170
 */
171
bool
172
fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch)
173
{
174
   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
175

176
   process_results(at);
177

178
   /* Only enable on gen's that opt-in (and actually have sample-passed
179
    * collection wired up:
180
    */
181
   if (!batch->ctx->screen->gmem_reason_mask)
182
      return fallback_use_bypass(batch);
183

184
   if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)
185
      return fallback_use_bypass(batch);
186

187
   for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
188
      /* If ms-rtt is involved, force GMEM, as we don't currently
189
       * implement a temporary render target that we can MSAA resolve
190
       * from
191
       */
192
      if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)
193
         return fallback_use_bypass(batch);
194
   }
195

196
   struct fd_batch_history *history = get_history(at, batch);
197
   if (!history)
198
      return fallback_use_bypass(batch);
199

200
   batch->autotune_result = get_result(at, history);
201
   batch->autotune_result->cost = batch->cost;
202

203
   bool use_bypass = fallback_use_bypass(batch);
204

205
   if (use_bypass)
206
      return true;
207

208
   if (history->num_results > 0) {
209
      uint32_t total_samples = 0;
210

211
      // TODO we should account for clears somehow
212
      // TODO should we try to notice if there is a drastic change from
213
      // frame to frame?
214
      list_for_each_entry (struct fd_batch_result, result, &history->results,
215
                           node) {
216
         total_samples += result->samples_passed;
217
      }
218

219
      float avg_samples = (float)total_samples / (float)history->num_results;
220

221
      /* Low sample count could mean there was only a clear.. or there was
222
       * a clear plus draws that touch no or few samples
223
       */
224
      if (avg_samples < 500.0)
225
         return true;
226

227
      /* Cost-per-sample is an estimate for the average number of reads+
228
       * writes for a given passed sample.
229
       */
230
      float sample_cost = batch->cost;
231
      sample_cost /= batch->num_draws;
232

233
      float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;
234
      DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, "
235
          "total_draw_cost=%f\n",
236
          batch->hash, batch->num_draws, total_samples, avg_samples,
237
          sample_cost, total_draw_cost);
238

239
      if (total_draw_cost < 3000.0)
240
         return true;
241
   }
242

243
   return use_bypass;
244
}
245

246
void
247
fd_autotune_init(struct fd_autotune *at, struct fd_device *dev)
248
{
249
   at->ht =
250
      _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
251
   list_inithead(&at->lru);
252

253
   at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),
254
                               0, "autotune");
255
   at->results = fd_bo_map(at->results_mem);
256

257
   list_inithead(&at->pending_results);
258
}
259

260
void
261
fd_autotune_fini(struct fd_autotune *at)
262
{
263
   _mesa_hash_table_destroy(at->ht, NULL);
264
   fd_bo_del(at->results_mem);
265
}
266

267
Product

Resources

Company