Path: blob/21.2-virgl/src/broadcom/simulator/v3dx_simulator.c
4560 views
/*1* Copyright © 2014-2017 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* @file v3dx_simulator.c25*26* Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.27*28* The register headers between V3D versions will have conflicting defines, so29* all register interactions appear in this file and are compiled per V3D version30* we support.31*/3233#ifdef USE_V3D_SIMULATOR3435#include <assert.h>36#include <stdbool.h>37#include <stdio.h>3839#include "v3d_simulator.h"40#include "v3d_simulator_wrapper.h"4142#include "util/macros.h"43#include "util/bitscan.h"44#include "drm-uapi/v3d_drm.h"4546#define HW_REGISTER_RO(x) (x)47#define HW_REGISTER_RW(x) (x)48#if V3D_VERSION >= 4149#include "libs/core/v3d/registers/4.1.35.0/v3d.h"50#else51#include "libs/core/v3d/registers/3.3.0.0/v3d.h"52#endif5354#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)55#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)5657static void58v3d_invalidate_l3(struct v3d_hw *v3d)59{60#if V3D_VERSION < 4061uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);6263V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);64V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);65#endif66}6768/* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */69static void70v3d_invalidate_l2c(struct v3d_hw *v3d)71{72if (V3D_VERSION >= 33)73return;7475V3D_WRITE(V3D_CTL_0_L2CACTL,76V3D_CTL_0_L2CACTL_L2CCLR_SET |77V3D_CTL_0_L2CACTL_L2CENA_SET);78}7980enum v3d_l2t_cache_flush_mode {81V3D_CACHE_FLUSH_MODE_FLUSH,82V3D_CACHE_FLUSH_MODE_CLEAR,83V3D_CACHE_FLUSH_MODE_CLEAN,84};8586/* Invalidates texture L2 cachelines */87static void88v3d_invalidate_l2t(struct v3d_hw *v3d)89{90V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);91V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);92V3D_WRITE(V3D_CTL_0_L2TCACTL,93V3D_CTL_0_L2TCACTL_L2TFLS_SET |94(V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));95}9697/*98* Wait for l2tcactl, used for flushes.99*100* FIXME: for a multicore scenario we should pass here the core. All wrapper101* assumes just one core, so would be better to handle that on that case.102*/103static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,104uint32_t ctrl)105{106assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));107108while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {109v3d_hw_tick(v3d);110}111}112113/* Flushes dirty texture cachelines from the L1 write combiner */114static void115v3d_flush_l1td(struct v3d_hw *v3d)116{117V3D_WRITE(V3D_CTL_0_L2TCACTL,118V3D_CTL_0_L2TCACTL_TMUWCF_SET);119120/* Note: here the kernel (and previous versions of the simulator121* wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We122* understand that it makes more sense to do like this. We need to123* confirm which one is doing it correctly. So far things work fine on124* the simulator this way.125*/126v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);127}128129/* Flushes dirty texture L2 cachelines */130static void131v3d_flush_l2t(struct v3d_hw *v3d)132{133V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);134V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);135V3D_WRITE(V3D_CTL_0_L2TCACTL,136V3D_CTL_0_L2TCACTL_L2TFLS_SET |137(V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));138139v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);140}141142/* Invalidates the slice caches. These are read-only caches. */143static void144v3d_invalidate_slices(struct v3d_hw *v3d)145{146V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);147}148149static void150v3d_invalidate_caches(struct v3d_hw *v3d)151{152v3d_invalidate_l3(v3d);153v3d_invalidate_l2c(v3d);154v3d_invalidate_l2t(v3d);155v3d_invalidate_slices(v3d);156}157158static uint32_t g_gmp_ofs;159static void160v3d_reload_gmp(struct v3d_hw *v3d)161{162/* Completely reset the GMP. */163V3D_WRITE(V3D_GMP_CFG,164V3D_GMP_CFG_PROTENABLE_SET);165V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);166V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);167while (V3D_READ(V3D_GMP_STATUS) &168V3D_GMP_STATUS_CFG_BUSY_SET) {169;170}171}172173static UNUSED void174v3d_flush_caches(struct v3d_hw *v3d)175{176v3d_flush_l1td(v3d);177v3d_flush_l2t(v3d);178}179180int181v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,182struct drm_v3d_submit_tfu *args)183{184int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;185186V3D_WRITE(V3D_TFU_IIA, args->iia);187V3D_WRITE(V3D_TFU_IIS, args->iis);188V3D_WRITE(V3D_TFU_ICA, args->ica);189V3D_WRITE(V3D_TFU_IUA, args->iua);190V3D_WRITE(V3D_TFU_IOA, args->ioa);191V3D_WRITE(V3D_TFU_IOS, args->ios);192V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);193V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);194V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);195V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);196197V3D_WRITE(V3D_TFU_ICFG, args->icfg);198199while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {200v3d_hw_tick(v3d);201}202203return 0;204}205206#if V3D_VERSION >= 41207int208v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,209struct drm_v3d_submit_csd *args,210uint32_t gmp_ofs)211{212int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &213V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);214g_gmp_ofs = gmp_ofs;215v3d_reload_gmp(v3d);216217v3d_invalidate_caches(v3d);218219V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);220V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);221V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);222V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);223V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);224V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);225/* CFG0 kicks off the job */226V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);227228/* Now we wait for the dispatch to finish. The safest way is to check229* if NUM_COMPLETED_JOBS has increased. Note that in spite of that230* name that register field is about the number of completed231* dispatches.232*/233while ((V3D_READ(V3D_CSD_0_STATUS) &234V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {235v3d_hw_tick(v3d);236}237238v3d_flush_caches(v3d);239240return 0;241}242#endif243244int245v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,246struct drm_v3d_get_param *args)247{248static const uint32_t reg_map[] = {249[DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,250[DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,251[DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,252[DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,253[DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,254[DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,255[DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,256};257258switch (args->param) {259case DRM_V3D_PARAM_SUPPORTS_TFU:260args->value = 1;261return 0;262case DRM_V3D_PARAM_SUPPORTS_CSD:263args->value = V3D_VERSION >= 41;264return 0;265case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:266args->value = 1;267return 0;268}269270if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {271args->value = V3D_READ(reg_map[args->param]);272return 0;273}274275fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",276(long long)args->value);277abort();278}279280static struct v3d_hw *v3d_isr_hw;281282283static void284v3d_isr_core(struct v3d_hw *v3d,285unsigned core)286{287/* FIXME: so far we are assuming just one core, and using only the _0_288* registers. If we add multiple-core on the simulator, we would need289* to pass core as a parameter, and chose the proper registers.290*/291assert(core == 0);292uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);293V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);294295if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {296uint32_t size = 256 * 1024;297uint32_t offset = v3d_simulator_get_spill(size);298299v3d_reload_gmp(v3d);300301V3D_WRITE(V3D_PTB_0_BPOA, offset);302V3D_WRITE(V3D_PTB_0_BPOS, size);303return;304}305306if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {307fprintf(stderr, "GMP violation at 0x%08x\n",308V3D_READ(V3D_GMP_VIO_ADDR));309abort();310} else {311fprintf(stderr,312"Unexpected ISR with core status 0x%08x\n",313core_status);314}315abort();316}317318static void319handle_mmu_interruptions(struct v3d_hw *v3d,320uint32_t hub_status)321{322bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;323bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;324bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;325326if (!(pti || cap || wrv))327return;328329const char *client = "?";330uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);331uint32_t va_width = 30;332333#if V3D_VERSION >= 41334static const char *const v3d41_axi_ids[] = {335"L2T",336"PTB",337"PSE",338"TLB",339"CLE",340"TFU",341"MMU",342"GMP",343};344345axi_id = axi_id >> 5;346if (axi_id < ARRAY_SIZE(v3d41_axi_ids))347client = v3d41_axi_ids[axi_id];348349uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);350351va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)352>> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);353#endif354/* Only the top bits (final number depends on the gen) of the virtual355* address are reported in the MMU VIO_ADDR register.356*/357uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) <<358(va_width - 32));359360/* Difference with the kernal: here were are going to abort after361* logging, so we don't bother with some stuff that the kernel does,362* like restoring the MMU ctrl bits363*/364365fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",366client, axi_id, (long long) vio_addr,367wrv ? ", write violation" : "",368pti ? ", pte invalid" : "",369cap ? ", cap exceeded" : "");370371abort();372}373374static void375v3d_isr_hub(struct v3d_hw *v3d)376{377uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);378379/* Acknowledge the interrupts we're handling here */380V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);381382if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {383/* FIXME: we were not able to raise this exception. We let the384* unreachable here, so we could get one if it is raised on385* the future. In any case, note that for this case we would386* only be doing debugging log.387*/388unreachable("TFU Conversion Complete interrupt not handled");389}390391handle_mmu_interruptions(v3d, hub_status);392}393394static void395v3d_isr(uint32_t hub_status)396{397struct v3d_hw *v3d = v3d_isr_hw;398uint32_t mask = hub_status;399400/* Check the hub_status bits */401while (mask) {402unsigned core = u_bit_scan(&mask);403404if (core == v3d_hw_get_hub_core())405v3d_isr_hub(v3d);406else407v3d_isr_core(v3d, core);408}409410return;411}412413void414v3dX(simulator_init_regs)(struct v3d_hw *v3d)415{416#if V3D_VERSION == 33417/* Set OVRTMUOUT to match kernel behavior.418*419* This means that the texture sampler uniform configuration's tmu420* output type field is used, instead of using the hardware default421* behavior based on the texture type. If you want the default422* behavior, you can still put "2" in the indirect texture state's423* output_type field.424*/425V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);426#endif427428/* FIXME: the kernel captures some additional core interrupts here,429* for tracing. Perhaps we should evaluate to do the same here and add430* some debug options.431*/432uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |433V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);434V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);435V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);436437uint32_t hub_interrupts =438(V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET | /* write violation */439V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET | /* page table invalid */440V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */441V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */442443V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);444V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);445446v3d_isr_hw = v3d;447v3d_hw_set_isr(v3d, v3d_isr);448}449450void451v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,452struct drm_v3d_submit_cl *submit,453uint32_t gmp_ofs)454{455int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &456V3D_CLE_0_BFC_BMFCT_SET);457458int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &459V3D_CLE_0_RFC_RMFCT_SET);460461g_gmp_ofs = gmp_ofs;462v3d_reload_gmp(v3d);463464v3d_invalidate_caches(v3d);465466if (submit->qma) {467V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);468V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);469}470#if V3D_VERSION >= 41471if (submit->qts) {472V3D_WRITE(V3D_CLE_0_CT0QTS,473V3D_CLE_0_CT0QTS_CTQTSEN_SET |474submit->qts);475}476#endif477V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);478V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);479480/* Wait for bin to complete before firing render. The kernel's481* scheduler implements this using the GPU scheduler blocking on the482* bin fence completing. (We don't use HW semaphores).483*/484while ((V3D_READ(V3D_CLE_0_BFC) &485V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {486v3d_hw_tick(v3d);487}488489v3d_invalidate_caches(v3d);490491V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);492V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);493494while ((V3D_READ(V3D_CLE_0_RFC) &495V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {496v3d_hw_tick(v3d);497}498}499500#endif /* USE_V3D_SIMULATOR */501502503