Path: blob/21.2-virgl/src/intel/dev/intel_device_info.c
7080 views
/*1* Copyright © 2013 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>25#include <stdio.h>26#include <stdlib.h>27#include <string.h>28#include <unistd.h>29#include "intel_device_info.h"30#include "compiler/shader_enums.h"31#include "intel/common/intel_gem.h"32#include "util/bitscan.h"33#include "util/log.h"34#include "util/macros.h"3536#include "drm-uapi/i915_drm.h"3738static const struct {39const char *name;40int pci_id;41} name_map[] = {42{ "lpt", 0x27a2 },43{ "brw", 0x2a02 },44{ "g4x", 0x2a42 },45{ "ilk", 0x0042 },46{ "snb", 0x0126 },47{ "ivb", 0x016a },48{ "hsw", 0x0d2e },49{ "byt", 0x0f33 },50{ "bdw", 0x162e },51{ "chv", 0x22B3 },52{ "skl", 0x1912 },53{ "bxt", 0x5A85 },54{ "kbl", 0x5912 },55{ "aml", 0x591C },56{ "glk", 0x3185 },57{ "cfl", 0x3E9B },58{ "whl", 0x3EA1 },59{ "cml", 0x9b41 },60{ "icl", 0x8a52 },61{ "ehl", 0x4500 },62{ "jsl", 0x4E71 },63{ "tgl", 0x9a49 },64{ "rkl", 0x4c8a },65{ "dg1", 0x4905 },66{ "adl", 0x4680 },67};6869/**70* Get the PCI ID for the device name.71*72* Returns -1 if the device is not known.73*/74int75intel_device_name_to_pci_device_id(const char *name)76{77for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {78if (!strcmp(name_map[i].name, name))79return name_map[i].pci_id;80}8182return -1;83}8485static const struct intel_device_info intel_device_info_gfx3 = {86.ver = 3,87.simulator_id = -1,88.cs_prefetch_size = 512,89};9091static const struct intel_device_info intel_device_info_i965 = {92.ver = 4,93.has_negative_rhw_bug = true,94.num_slices = 1,95.num_subslices = { 1, },96.num_eu_per_subslice = 8,97.num_thread_per_eu = 4,98.max_vs_threads = 16,99.max_gs_threads = 2,100.max_wm_threads = 8 * 4,101.urb = {102.size = 256,103},104.timestamp_frequency = 12500000,105.simulator_id = -1,106.cs_prefetch_size = 512,107};108109static const struct intel_device_info intel_device_info_g4x = {110.ver = 4,111.verx10 = 45,112.has_pln = true,113.has_compr4 = true,114.has_surface_tile_offset = true,115.is_g4x = true,116.num_slices = 1,117.num_subslices = { 1, },118.num_eu_per_subslice = 10,119.num_thread_per_eu = 5,120.max_vs_threads = 32,121.max_gs_threads = 2,122.max_wm_threads = 10 * 5,123.urb = {124.size = 384,125},126.timestamp_frequency = 12500000,127.simulator_id = -1,128.cs_prefetch_size = 512,129};130131static const struct intel_device_info intel_device_info_ilk = {132.ver = 5,133.has_pln = true,134.has_compr4 = true,135.has_surface_tile_offset = true,136.num_slices = 1,137.num_subslices = { 1, },138.num_eu_per_subslice = 12,139.num_thread_per_eu = 6,140.max_vs_threads = 72,141.max_gs_threads = 32,142.max_wm_threads = 12 * 6,143.urb = {144.size = 1024,145},146.timestamp_frequency = 12500000,147.simulator_id = -1,148.cs_prefetch_size = 512,149};150151static const struct intel_device_info intel_device_info_snb_gt1 = {152.ver = 6,153.gt = 1,154.has_hiz_and_separate_stencil = true,155.has_llc = true,156.has_pln = true,157.has_surface_tile_offset = true,158.needs_unlit_centroid_workaround = true,159.num_slices = 1,160.num_subslices = { 1, },161.num_eu_per_subslice = 6,162.num_thread_per_eu = 6, /* Not confirmed */163.max_vs_threads = 24,164.max_gs_threads = 21, /* conservative; 24 if rendering disabled. */165.max_wm_threads = 40,166.urb = {167.size = 32,168.min_entries = {169[MESA_SHADER_VERTEX] = 24,170},171.max_entries = {172[MESA_SHADER_VERTEX] = 256,173[MESA_SHADER_GEOMETRY] = 256,174},175},176.timestamp_frequency = 12500000,177.simulator_id = -1,178.cs_prefetch_size = 512,179};180181static const struct intel_device_info intel_device_info_snb_gt2 = {182.ver = 6,183.gt = 2,184.has_hiz_and_separate_stencil = true,185.has_llc = true,186.has_pln = true,187.has_surface_tile_offset = true,188.needs_unlit_centroid_workaround = true,189.num_slices = 1,190.num_subslices = { 1, },191.num_eu_per_subslice = 12,192.num_thread_per_eu = 6, /* Not confirmed */193.max_vs_threads = 60,194.max_gs_threads = 60,195.max_wm_threads = 80,196.urb = {197.size = 64,198.min_entries = {199[MESA_SHADER_VERTEX] = 24,200},201.max_entries = {202[MESA_SHADER_VERTEX] = 256,203[MESA_SHADER_GEOMETRY] = 256,204},205},206.timestamp_frequency = 12500000,207.simulator_id = -1,208.cs_prefetch_size = 512,209};210211#define GFX7_FEATURES \212.ver = 7, \213.has_hiz_and_separate_stencil = true, \214.must_use_separate_stencil = true, \215.has_llc = true, \216.has_pln = true, \217.has_64bit_float = true, \218.has_surface_tile_offset = true, \219.timestamp_frequency = 12500000, \220.cs_prefetch_size = 512221222static const struct intel_device_info intel_device_info_ivb_gt1 = {223GFX7_FEATURES, .is_ivybridge = true, .gt = 1,224.num_slices = 1,225.num_subslices = { 1, },226.num_eu_per_subslice = 6,227.num_thread_per_eu = 6,228.l3_banks = 2,229.max_vs_threads = 36,230.max_tcs_threads = 36,231.max_tes_threads = 36,232.max_gs_threads = 36,233.max_wm_threads = 48,234.max_cs_threads = 36,235.urb = {236.min_entries = {237[MESA_SHADER_VERTEX] = 32,238[MESA_SHADER_TESS_EVAL] = 10,239},240.max_entries = {241[MESA_SHADER_VERTEX] = 512,242[MESA_SHADER_TESS_CTRL] = 32,243[MESA_SHADER_TESS_EVAL] = 288,244[MESA_SHADER_GEOMETRY] = 192,245},246},247.simulator_id = 7,248};249250static const struct intel_device_info intel_device_info_ivb_gt2 = {251GFX7_FEATURES, .is_ivybridge = true, .gt = 2,252.num_slices = 1,253.num_subslices = { 1, },254.num_eu_per_subslice = 12,255.num_thread_per_eu = 8, /* Not sure why this isn't a multiple of256* @max_wm_threads ... */257.l3_banks = 4,258.max_vs_threads = 128,259.max_tcs_threads = 128,260.max_tes_threads = 128,261.max_gs_threads = 128,262.max_wm_threads = 172,263.max_cs_threads = 64,264.urb = {265.min_entries = {266[MESA_SHADER_VERTEX] = 32,267[MESA_SHADER_TESS_EVAL] = 10,268},269.max_entries = {270[MESA_SHADER_VERTEX] = 704,271[MESA_SHADER_TESS_CTRL] = 64,272[MESA_SHADER_TESS_EVAL] = 448,273[MESA_SHADER_GEOMETRY] = 320,274},275},276.simulator_id = 7,277};278279static const struct intel_device_info intel_device_info_byt = {280GFX7_FEATURES, .is_baytrail = true, .gt = 1,281.num_slices = 1,282.num_subslices = { 1, },283.num_eu_per_subslice = 4,284.num_thread_per_eu = 8,285.l3_banks = 1,286.has_llc = false,287.max_vs_threads = 36,288.max_tcs_threads = 36,289.max_tes_threads = 36,290.max_gs_threads = 36,291.max_wm_threads = 48,292.max_cs_threads = 32,293.urb = {294.min_entries = {295[MESA_SHADER_VERTEX] = 32,296[MESA_SHADER_TESS_EVAL] = 10,297},298.max_entries = {299[MESA_SHADER_VERTEX] = 512,300[MESA_SHADER_TESS_CTRL] = 32,301[MESA_SHADER_TESS_EVAL] = 288,302[MESA_SHADER_GEOMETRY] = 192,303},304},305.simulator_id = 10,306};307308#define HSW_FEATURES \309GFX7_FEATURES, \310.is_haswell = true, \311.verx10 = 75, \312.supports_simd16_3src = true313314static const struct intel_device_info intel_device_info_hsw_gt1 = {315HSW_FEATURES, .gt = 1,316.num_slices = 1,317.num_subslices = { 1, },318.num_eu_per_subslice = 10,319.num_thread_per_eu = 7,320.l3_banks = 2,321.max_vs_threads = 70,322.max_tcs_threads = 70,323.max_tes_threads = 70,324.max_gs_threads = 70,325.max_wm_threads = 102,326.max_cs_threads = 70,327.urb = {328.min_entries = {329[MESA_SHADER_VERTEX] = 32,330[MESA_SHADER_TESS_EVAL] = 10,331},332.max_entries = {333[MESA_SHADER_VERTEX] = 640,334[MESA_SHADER_TESS_CTRL] = 64,335[MESA_SHADER_TESS_EVAL] = 384,336[MESA_SHADER_GEOMETRY] = 256,337},338},339.simulator_id = 9,340};341342static const struct intel_device_info intel_device_info_hsw_gt2 = {343HSW_FEATURES, .gt = 2,344.num_slices = 1,345.num_subslices = { 2, },346.num_eu_per_subslice = 10,347.num_thread_per_eu = 7,348.l3_banks = 4,349.max_vs_threads = 280,350.max_tcs_threads = 256,351.max_tes_threads = 280,352.max_gs_threads = 256,353.max_wm_threads = 204,354.max_cs_threads = 70,355.urb = {356.min_entries = {357[MESA_SHADER_VERTEX] = 64,358[MESA_SHADER_TESS_EVAL] = 10,359},360.max_entries = {361[MESA_SHADER_VERTEX] = 1664,362[MESA_SHADER_TESS_CTRL] = 128,363[MESA_SHADER_TESS_EVAL] = 960,364[MESA_SHADER_GEOMETRY] = 640,365},366},367.simulator_id = 9,368};369370static const struct intel_device_info intel_device_info_hsw_gt3 = {371HSW_FEATURES, .gt = 3,372.num_slices = 2,373.num_subslices = { 2, },374.num_eu_per_subslice = 10,375.num_thread_per_eu = 7,376.l3_banks = 8,377.max_vs_threads = 280,378.max_tcs_threads = 256,379.max_tes_threads = 280,380.max_gs_threads = 256,381.max_wm_threads = 408,382.max_cs_threads = 70,383.urb = {384.min_entries = {385[MESA_SHADER_VERTEX] = 64,386[MESA_SHADER_TESS_EVAL] = 10,387},388.max_entries = {389[MESA_SHADER_VERTEX] = 1664,390[MESA_SHADER_TESS_CTRL] = 128,391[MESA_SHADER_TESS_EVAL] = 960,392[MESA_SHADER_GEOMETRY] = 640,393},394},395.simulator_id = 9,396};397398/* It's unclear how well supported sampling from the hiz buffer is on GFX8,399* so keep things conservative for now and set has_sample_with_hiz = false.400*/401#define GFX8_FEATURES \402.ver = 8, \403.has_hiz_and_separate_stencil = true, \404.must_use_separate_stencil = true, \405.has_llc = true, \406.has_sample_with_hiz = false, \407.has_pln = true, \408.has_integer_dword_mul = true, \409.has_64bit_float = true, \410.has_64bit_int = true, \411.supports_simd16_3src = true, \412.has_surface_tile_offset = true, \413.num_thread_per_eu = 7, \414.max_vs_threads = 504, \415.max_tcs_threads = 504, \416.max_tes_threads = 504, \417.max_gs_threads = 504, \418.max_wm_threads = 384, \419.timestamp_frequency = 12500000, \420.cs_prefetch_size = 512421422static const struct intel_device_info intel_device_info_bdw_gt1 = {423GFX8_FEATURES, .gt = 1,424.is_broadwell = true,425.num_slices = 1,426.num_subslices = { 2, },427.num_eu_per_subslice = 6,428.l3_banks = 2,429.max_cs_threads = 42,430.urb = {431.min_entries = {432[MESA_SHADER_VERTEX] = 64,433[MESA_SHADER_TESS_EVAL] = 34,434},435.max_entries = {436[MESA_SHADER_VERTEX] = 2560,437[MESA_SHADER_TESS_CTRL] = 504,438[MESA_SHADER_TESS_EVAL] = 1536,439/* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */440[MESA_SHADER_GEOMETRY] = 690,441},442},443.simulator_id = 11,444};445446static const struct intel_device_info intel_device_info_bdw_gt2 = {447GFX8_FEATURES, .gt = 2,448.is_broadwell = true,449.num_slices = 1,450.num_subslices = { 3, },451.num_eu_per_subslice = 8,452.l3_banks = 4,453.max_cs_threads = 56,454.urb = {455.min_entries = {456[MESA_SHADER_VERTEX] = 64,457[MESA_SHADER_TESS_EVAL] = 34,458},459.max_entries = {460[MESA_SHADER_VERTEX] = 2560,461[MESA_SHADER_TESS_CTRL] = 504,462[MESA_SHADER_TESS_EVAL] = 1536,463[MESA_SHADER_GEOMETRY] = 960,464},465},466.simulator_id = 11,467};468469static const struct intel_device_info intel_device_info_bdw_gt3 = {470GFX8_FEATURES, .gt = 3,471.is_broadwell = true,472.num_slices = 2,473.num_subslices = { 3, 3, },474.num_eu_per_subslice = 8,475.l3_banks = 8,476.max_cs_threads = 56,477.urb = {478.min_entries = {479[MESA_SHADER_VERTEX] = 64,480[MESA_SHADER_TESS_EVAL] = 34,481},482.max_entries = {483[MESA_SHADER_VERTEX] = 2560,484[MESA_SHADER_TESS_CTRL] = 504,485[MESA_SHADER_TESS_EVAL] = 1536,486[MESA_SHADER_GEOMETRY] = 960,487},488},489.simulator_id = 11,490};491492static const struct intel_device_info intel_device_info_chv = {493GFX8_FEATURES, .is_cherryview = 1, .gt = 1,494.has_llc = false,495.has_integer_dword_mul = false,496.num_slices = 1,497.num_subslices = { 2, },498.num_eu_per_subslice = 8,499.l3_banks = 2,500.max_vs_threads = 80,501.max_tcs_threads = 80,502.max_tes_threads = 80,503.max_gs_threads = 80,504.max_wm_threads = 128,505.max_cs_threads = 6 * 7,506.urb = {507.min_entries = {508[MESA_SHADER_VERTEX] = 34,509[MESA_SHADER_TESS_EVAL] = 34,510},511.max_entries = {512[MESA_SHADER_VERTEX] = 640,513[MESA_SHADER_TESS_CTRL] = 80,514[MESA_SHADER_TESS_EVAL] = 384,515[MESA_SHADER_GEOMETRY] = 256,516},517},518.simulator_id = 13,519};520521#define GFX9_HW_INFO \522.ver = 9, \523.max_vs_threads = 336, \524.max_gs_threads = 336, \525.max_tcs_threads = 336, \526.max_tes_threads = 336, \527.max_cs_threads = 56, \528.timestamp_frequency = 12000000, \529.cs_prefetch_size = 512, \530.urb = { \531.min_entries = { \532[MESA_SHADER_VERTEX] = 64, \533[MESA_SHADER_TESS_EVAL] = 34, \534}, \535.max_entries = { \536[MESA_SHADER_VERTEX] = 1856, \537[MESA_SHADER_TESS_CTRL] = 672, \538[MESA_SHADER_TESS_EVAL] = 1120, \539[MESA_SHADER_GEOMETRY] = 640, \540}, \541}542543#define GFX9_LP_FEATURES \544GFX8_FEATURES, \545GFX9_HW_INFO, \546.has_integer_dword_mul = false, \547.gt = 1, \548.has_llc = false, \549.has_sample_with_hiz = true, \550.num_slices = 1, \551.num_thread_per_eu = 6, \552.max_vs_threads = 112, \553.max_tcs_threads = 112, \554.max_tes_threads = 112, \555.max_gs_threads = 112, \556.max_cs_threads = 6 * 6, \557.timestamp_frequency = 19200000, \558.urb = { \559.min_entries = { \560[MESA_SHADER_VERTEX] = 34, \561[MESA_SHADER_TESS_EVAL] = 34, \562}, \563.max_entries = { \564[MESA_SHADER_VERTEX] = 704, \565[MESA_SHADER_TESS_CTRL] = 256, \566[MESA_SHADER_TESS_EVAL] = 416, \567[MESA_SHADER_GEOMETRY] = 256, \568}, \569}570571#define GFX9_LP_FEATURES_3X6 \572GFX9_LP_FEATURES, \573.num_subslices = { 3, }, \574.num_eu_per_subslice = 6575576#define GFX9_LP_FEATURES_2X6 \577GFX9_LP_FEATURES, \578.num_subslices = { 2, }, \579.num_eu_per_subslice = 6, \580.max_vs_threads = 56, \581.max_tcs_threads = 56, \582.max_tes_threads = 56, \583.max_gs_threads = 56, \584.max_cs_threads = 6 * 6, \585.urb = { \586.min_entries = { \587[MESA_SHADER_VERTEX] = 34, \588[MESA_SHADER_TESS_EVAL] = 34, \589}, \590.max_entries = { \591[MESA_SHADER_VERTEX] = 352, \592[MESA_SHADER_TESS_CTRL] = 128, \593[MESA_SHADER_TESS_EVAL] = 208, \594[MESA_SHADER_GEOMETRY] = 128, \595}, \596}597598#define GFX9_FEATURES \599GFX8_FEATURES, \600GFX9_HW_INFO, \601.has_sample_with_hiz = true602603static const struct intel_device_info intel_device_info_skl_gt1 = {604GFX9_FEATURES, .gt = 1,605.is_skylake = true,606.num_slices = 1,607.num_subslices = { 2, },608.num_eu_per_subslice = 6,609.l3_banks = 2,610/* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions611* leading to some vertices to go missing if we use too much URB.612*/613.urb.max_entries[MESA_SHADER_VERTEX] = 928,614.simulator_id = 12,615};616617static const struct intel_device_info intel_device_info_skl_gt2 = {618GFX9_FEATURES, .gt = 2,619.is_skylake = true,620.num_slices = 1,621.num_subslices = { 3, },622.num_eu_per_subslice = 8,623.l3_banks = 4,624.simulator_id = 12,625};626627static const struct intel_device_info intel_device_info_skl_gt3 = {628GFX9_FEATURES, .gt = 3,629.is_skylake = true,630.num_slices = 2,631.num_subslices = { 3, 3, },632.num_eu_per_subslice = 8,633.l3_banks = 8,634.simulator_id = 12,635};636637static const struct intel_device_info intel_device_info_skl_gt4 = {638GFX9_FEATURES, .gt = 4,639.is_skylake = true,640.num_slices = 3,641.num_subslices = { 3, 3, 3, },642.num_eu_per_subslice = 8,643.l3_banks = 12,644/* From the "L3 Allocation and Programming" documentation:645*646* "URB is limited to 1008KB due to programming restrictions. This is not a647* restriction of the L3 implementation, but of the FF and other clients.648* Therefore, in a GT4 implementation it is possible for the programmed649* allocation of the L3 data array to provide 3*384KB=1152KB for URB, but650* only 1008KB of this will be used."651*/652.simulator_id = 12,653};654655static const struct intel_device_info intel_device_info_bxt = {656GFX9_LP_FEATURES_3X6,657.is_broxton = true,658.l3_banks = 2,659.simulator_id = 14,660};661662static const struct intel_device_info intel_device_info_bxt_2x6 = {663GFX9_LP_FEATURES_2X6,664.is_broxton = true,665.l3_banks = 1,666.simulator_id = 14,667};668/*669* Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.670* There's no KBL entry. Using the default SKL (GFX9) GS entries value.671*/672673static const struct intel_device_info intel_device_info_kbl_gt1 = {674GFX9_FEATURES,675.is_kabylake = true,676.gt = 1,677678.max_cs_threads = 7 * 6,679.num_slices = 1,680.num_subslices = { 2, },681.num_eu_per_subslice = 6,682.l3_banks = 2,683/* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions684* leading to some vertices to go missing if we use too much URB.685*/686.urb.max_entries[MESA_SHADER_VERTEX] = 928,687.urb.max_entries[MESA_SHADER_GEOMETRY] = 256,688.simulator_id = 16,689};690691static const struct intel_device_info intel_device_info_kbl_gt1_5 = {692GFX9_FEATURES,693.is_kabylake = true,694.gt = 1,695696.max_cs_threads = 7 * 6,697.num_slices = 1,698.num_subslices = { 3, },699.num_eu_per_subslice = 6,700.l3_banks = 4,701.simulator_id = 16,702};703704static const struct intel_device_info intel_device_info_kbl_gt2 = {705GFX9_FEATURES,706.is_kabylake = true,707.gt = 2,708709.num_slices = 1,710.num_subslices = { 3, },711.num_eu_per_subslice = 8,712.l3_banks = 4,713.simulator_id = 16,714};715716static const struct intel_device_info intel_device_info_kbl_gt3 = {717GFX9_FEATURES,718.is_kabylake = true,719.gt = 3,720721.num_slices = 2,722.num_subslices = { 3, 3, },723.num_eu_per_subslice = 8,724.l3_banks = 8,725.simulator_id = 16,726};727728static const struct intel_device_info intel_device_info_kbl_gt4 = {729GFX9_FEATURES,730.is_kabylake = true,731.gt = 4,732733/*734* From the "L3 Allocation and Programming" documentation:735*736* "URB is limited to 1008KB due to programming restrictions. This737* is not a restriction of the L3 implementation, but of the FF and738* other clients. Therefore, in a GT4 implementation it is739* possible for the programmed allocation of the L3 data array to740* provide 3*384KB=1152KB for URB, but only 1008KB of this741* will be used."742*/743.num_slices = 3,744.num_subslices = { 3, 3, 3, },745.num_eu_per_subslice = 8,746.l3_banks = 12,747.simulator_id = 16,748};749750static const struct intel_device_info intel_device_info_glk = {751GFX9_LP_FEATURES_3X6,752.is_geminilake = true,753.l3_banks = 2,754.simulator_id = 17,755};756757static const struct intel_device_info intel_device_info_glk_2x6 = {758GFX9_LP_FEATURES_2X6,759.is_geminilake = true,760.l3_banks = 2,761.simulator_id = 17,762};763764static const struct intel_device_info intel_device_info_cfl_gt1 = {765GFX9_FEATURES,766.is_coffeelake = true,767.gt = 1,768769.num_slices = 1,770.num_subslices = { 2, },771.num_eu_per_subslice = 6,772.l3_banks = 2,773/* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions774* leading to some vertices to go missing if we use too much URB.775*/776.urb.max_entries[MESA_SHADER_VERTEX] = 928,777.urb.max_entries[MESA_SHADER_GEOMETRY] = 256,778.simulator_id = 24,779};780static const struct intel_device_info intel_device_info_cfl_gt2 = {781GFX9_FEATURES,782.is_coffeelake = true,783.gt = 2,784785.num_slices = 1,786.num_subslices = { 3, },787.num_eu_per_subslice = 8,788.l3_banks = 4,789.simulator_id = 24,790};791792static const struct intel_device_info intel_device_info_cfl_gt3 = {793GFX9_FEATURES,794.is_coffeelake = true,795.gt = 3,796797.num_slices = 2,798.num_subslices = { 3, 3, },799.num_eu_per_subslice = 8,800.l3_banks = 8,801.simulator_id = 24,802};803804#define subslices(args...) { args, }805806#define GFX11_HW_INFO \807.ver = 11, \808.has_pln = false, \809.max_vs_threads = 364, \810.max_gs_threads = 224, \811.max_tcs_threads = 224, \812.max_tes_threads = 364, \813.max_cs_threads = 56, \814.cs_prefetch_size = 512815816#define GFX11_FEATURES(_gt, _slices, _subslices, _l3) \817GFX8_FEATURES, \818GFX11_HW_INFO, \819.has_64bit_float = false, \820.has_64bit_int = false, \821.has_integer_dword_mul = false, \822.has_sample_with_hiz = false, \823.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \824.num_subslices = _subslices, \825.num_eu_per_subslice = 8826827#define GFX11_URB_MIN_MAX_ENTRIES \828.min_entries = { \829[MESA_SHADER_VERTEX] = 64, \830[MESA_SHADER_TESS_EVAL] = 34, \831}, \832.max_entries = { \833[MESA_SHADER_VERTEX] = 2384, \834[MESA_SHADER_TESS_CTRL] = 1032, \835[MESA_SHADER_TESS_EVAL] = 2384, \836[MESA_SHADER_GEOMETRY] = 1032, \837}838839static const struct intel_device_info intel_device_info_icl_gt2 = {840GFX11_FEATURES(2, 1, subslices(8), 8),841.urb = {842GFX11_URB_MIN_MAX_ENTRIES,843},844.simulator_id = 19,845};846847static const struct intel_device_info intel_device_info_icl_gt1_5 = {848GFX11_FEATURES(1, 1, subslices(6), 6),849.urb = {850GFX11_URB_MIN_MAX_ENTRIES,851},852.simulator_id = 19,853};854855static const struct intel_device_info intel_device_info_icl_gt1 = {856GFX11_FEATURES(1, 1, subslices(4), 6),857.urb = {858GFX11_URB_MIN_MAX_ENTRIES,859},860.simulator_id = 19,861};862863static const struct intel_device_info intel_device_info_icl_gt0_5 = {864GFX11_FEATURES(1, 1, subslices(1), 6),865.urb = {866GFX11_URB_MIN_MAX_ENTRIES,867},868.simulator_id = 19,869};870871#define GFX11_LP_FEATURES \872.is_elkhartlake = true, \873.urb = { \874GFX11_URB_MIN_MAX_ENTRIES, \875}, \876.disable_ccs_repack = true, \877.simulator_id = 28878879static const struct intel_device_info intel_device_info_ehl_4x8 = {880GFX11_FEATURES(1, 1, subslices(4), 4),881GFX11_LP_FEATURES,882};883884static const struct intel_device_info intel_device_info_ehl_4x6 = {885GFX11_FEATURES(1, 1, subslices(4), 4),886GFX11_LP_FEATURES,887.num_eu_per_subslice = 6,888};889890static const struct intel_device_info intel_device_info_ehl_4x5 = {891GFX11_FEATURES(1, 1, subslices(4), 4),892GFX11_LP_FEATURES,893.num_eu_per_subslice = 5,894};895896static const struct intel_device_info intel_device_info_ehl_4x4 = {897GFX11_FEATURES(1, 1, subslices(4), 4),898GFX11_LP_FEATURES,899.num_eu_per_subslice = 4,900};901902static const struct intel_device_info intel_device_info_ehl_2x8 = {903GFX11_FEATURES(1, 1, subslices(2), 4),904GFX11_LP_FEATURES,905};906907static const struct intel_device_info intel_device_info_ehl_2x4 = {908GFX11_FEATURES(1, 1, subslices(2), 4),909GFX11_LP_FEATURES,910.num_eu_per_subslice =4,911};912913#define GFX12_URB_MIN_MAX_ENTRIES \914.min_entries = { \915[MESA_SHADER_VERTEX] = 64, \916[MESA_SHADER_TESS_EVAL] = 34, \917}, \918.max_entries = { \919[MESA_SHADER_VERTEX] = 3576, \920[MESA_SHADER_TESS_CTRL] = 1548, \921[MESA_SHADER_TESS_EVAL] = 3576, \922/* Wa_14013840143 */ \923[MESA_SHADER_GEOMETRY] = 1536, \924}925926#define GFX12_HW_INFO \927.ver = 12, \928.has_pln = false, \929.has_sample_with_hiz = false, \930.has_aux_map = true, \931.max_vs_threads = 546, \932.max_gs_threads = 336, \933.max_tcs_threads = 336, \934.max_tes_threads = 546, \935.max_cs_threads = 112, /* threads per DSS */ \936.urb = { \937GFX12_URB_MIN_MAX_ENTRIES, \938}939940#define GFX12_FEATURES(_gt, _slices, _l3) \941GFX8_FEATURES, \942GFX12_HW_INFO, \943.has_64bit_float = false, \944.has_64bit_int = false, \945.has_integer_dword_mul = false, \946.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \947.simulator_id = 22, \948.num_eu_per_subslice = 16, \949.cs_prefetch_size = 512950951#define dual_subslices(args...) { args, }952953#define GFX12_GT05_FEATURES \954GFX12_FEATURES(1, 1, 4), \955.num_subslices = dual_subslices(1)956957#define GFX12_GT_FEATURES(_gt) \958GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8), \959.num_subslices = dual_subslices(_gt == 1 ? 2 : 6)960961static const struct intel_device_info intel_device_info_tgl_gt1 = {962GFX12_GT_FEATURES(1),963.is_tigerlake = true,964};965966static const struct intel_device_info intel_device_info_tgl_gt2 = {967GFX12_GT_FEATURES(2),968.is_tigerlake = true,969};970971static const struct intel_device_info intel_device_info_rkl_gt05 = {972GFX12_GT05_FEATURES,973.is_rocketlake = true,974};975976static const struct intel_device_info intel_device_info_rkl_gt1 = {977GFX12_GT_FEATURES(1),978.is_rocketlake = true,979};980981static const struct intel_device_info intel_device_info_adl_gt05 = {982GFX12_GT05_FEATURES,983.is_alderlake = true,984};985986static const struct intel_device_info intel_device_info_adl_gt1 = {987GFX12_GT_FEATURES(1),988.is_alderlake = true,989};990991static const struct intel_device_info intel_device_info_adl_gt2 = {992GFX12_GT_FEATURES(2),993.is_alderlake = true,994};995996#define GFX12_DG1_FEATURES \997GFX12_GT_FEATURES(2), \998.is_dg1 = true, \999.has_llc = false, \1000.has_local_mem = true, \1001.urb.size = 768, \1002.simulator_id = 3010031004UNUSED static const struct intel_device_info intel_device_info_dg1 = {1005GFX12_DG1_FEATURES,1006};10071008static void1009intel_device_info_set_eu_mask(struct intel_device_info *devinfo,1010unsigned slice,1011unsigned subslice,1012unsigned eu_mask)1013{1014unsigned subslice_offset = slice * devinfo->eu_slice_stride +1015subslice * devinfo->eu_subslice_stride;10161017for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {1018devinfo->eu_masks[subslice_offset + b_eu] =1019(((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;1020}1021}10221023/* Generate slice/subslice/eu masks from number of1024* slices/subslices/eu_per_subslices in the per generation/gt intel_device_info1025* structure.1026*1027* These can be overridden with values reported by the kernel either from1028* getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+1029* through the i915 query uapi.1030*/1031static void1032fill_masks(struct intel_device_info *devinfo)1033{1034devinfo->slice_masks = (1U << devinfo->num_slices) - 1;10351036/* Subslice masks */1037unsigned max_subslices = 0;1038for (int s = 0; s < devinfo->num_slices; s++)1039max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);1040devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);10411042for (int s = 0; s < devinfo->num_slices; s++) {1043devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =1044(1U << devinfo->num_subslices[s]) - 1;1045}10461047/* EU masks */1048devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);1049devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;10501051for (int s = 0; s < devinfo->num_slices; s++) {1052for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {1053intel_device_info_set_eu_mask(devinfo, s, ss,1054(1U << devinfo->num_eu_per_subslice) - 1);1055}1056}1057}10581059static void1060reset_masks(struct intel_device_info *devinfo)1061{1062devinfo->subslice_slice_stride = 0;1063devinfo->eu_subslice_stride = 0;1064devinfo->eu_slice_stride = 0;10651066devinfo->num_slices = 0;1067devinfo->num_eu_per_subslice = 0;1068memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));10691070memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));1071memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));1072memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));1073memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));1074}10751076static void1077update_from_topology(struct intel_device_info *devinfo,1078const struct drm_i915_query_topology_info *topology)1079{1080reset_masks(devinfo);10811082devinfo->subslice_slice_stride = topology->subslice_stride;10831084devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);1085devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;10861087assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));1088memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));1089devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);10901091uint32_t subslice_mask_len =1092topology->max_slices * topology->subslice_stride;1093assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);1094memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],1095subslice_mask_len);10961097uint32_t n_subslices = 0;1098for (int s = 0; s < topology->max_slices; s++) {1099if ((devinfo->slice_masks & (1 << s)) == 0)1100continue;11011102for (int b = 0; b < devinfo->subslice_slice_stride; b++) {1103devinfo->num_subslices[s] +=1104__builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);1105}1106n_subslices += devinfo->num_subslices[s];1107}1108assert(n_subslices > 0);11091110if (devinfo->ver >= 11) {1111/* On current ICL+ hardware we only have one slice. */1112assert(devinfo->slice_masks == 1);11131114/* Count the number of subslices on each pixel pipe. Assume that every1115* contiguous group of 4 subslices in the mask belong to the same pixel1116* pipe. However note that on TGL the kernel returns a mask of enabled1117* *dual* subslices instead of actual subslices somewhat confusingly, so1118* each pixel pipe only takes 2 bits in the mask even though it's still1119* 4 subslices.1120*/1121const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;1122for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {1123const unsigned ppipe_mask = BITFIELD_RANGE(p * ppipe_bits, ppipe_bits);1124devinfo->ppipe_subslices[p] =1125__builtin_popcount(devinfo->subslice_masks[0] & ppipe_mask);1126}1127}11281129if (devinfo->ver == 12 && devinfo->num_slices == 1) {1130if (n_subslices >= 6) {1131assert(n_subslices == 6);1132devinfo->l3_banks = 8;1133} else if (n_subslices > 2) {1134devinfo->l3_banks = 6;1135} else {1136devinfo->l3_banks = 4;1137}1138}11391140uint32_t eu_mask_len =1141topology->eu_stride * topology->max_subslices * topology->max_slices;1142assert(sizeof(devinfo->eu_masks) >= eu_mask_len);1143memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);11441145uint32_t n_eus = 0;1146for (int b = 0; b < eu_mask_len; b++)1147n_eus += __builtin_popcount(devinfo->eu_masks[b]);11481149devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);1150}11511152static bool1153update_from_masks(struct intel_device_info *devinfo, uint32_t slice_mask,1154uint32_t subslice_mask, uint32_t n_eus)1155{1156struct drm_i915_query_topology_info *topology;11571158assert((slice_mask & 0xff) == slice_mask);11591160size_t data_length = 100;11611162topology = calloc(1, sizeof(*topology) + data_length);1163if (!topology)1164return false;11651166topology->max_slices = util_last_bit(slice_mask);1167topology->max_subslices = util_last_bit(subslice_mask);11681169topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8);1170topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8);11711172uint32_t n_subslices = __builtin_popcount(slice_mask) *1173__builtin_popcount(subslice_mask);1174uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);1175uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;11761177topology->eu_offset = topology->subslice_offset +1178DIV_ROUND_UP(topology->max_subslices, 8);1179topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);11801181/* Set slice mask in topology */1182for (int b = 0; b < topology->subslice_offset; b++)1183topology->data[b] = (slice_mask >> (b * 8)) & 0xff;11841185for (int s = 0; s < topology->max_slices; s++) {11861187/* Set subslice mask in topology */1188for (int b = 0; b < topology->subslice_stride; b++) {1189int subslice_offset = topology->subslice_offset +1190s * topology->subslice_stride + b;11911192topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;1193}11941195/* Set eu mask in topology */1196for (int ss = 0; ss < topology->max_subslices; ss++) {1197for (int b = 0; b < topology->eu_stride; b++) {1198int eu_offset = topology->eu_offset +1199(s * topology->max_subslices + ss) * topology->eu_stride + b;12001201topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;1202}1203}1204}12051206update_from_topology(devinfo, topology);1207free(topology);12081209return true;1210}12111212static bool1213getparam(int fd, uint32_t param, int *value)1214{1215int tmp;12161217struct drm_i915_getparam gp = {1218.param = param,1219.value = &tmp,1220};12211222int ret = intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);1223if (ret != 0)1224return false;12251226*value = tmp;1227return true;1228}12291230bool1231intel_get_device_info_from_pci_id(int pci_id,1232struct intel_device_info *devinfo)1233{1234switch (pci_id) {1235#undef CHIPSET1236#define CHIPSET(id, family, fam_str, name) \1237case id: *devinfo = intel_device_info_##family; break;1238#include "pci_ids/i965_pci_ids.h"1239#include "pci_ids/iris_pci_ids.h"12401241#undef CHIPSET1242#define CHIPSET(id, fam_str, name) \1243case id: *devinfo = intel_device_info_gfx3; break;1244#include "pci_ids/i915_pci_ids.h"12451246default:1247mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id);1248return false;1249}12501251fill_masks(devinfo);12521253/* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:1254*1255* "Scratch Space per slice is computed based on 4 sub-slices. SW must1256* allocate scratch space enough so that each slice has 4 slices allowed."1257*1258* The equivalent internal documentation says that this programming note1259* applies to all Gfx9+ platforms.1260*1261* The hardware typically calculates the scratch space pointer by taking1262* the base address, and adding per-thread-scratch-space * thread ID.1263* Extra padding can be necessary depending how the thread IDs are1264* calculated for a particular shader stage.1265*/12661267switch(devinfo->ver) {1268case 9:1269devinfo->max_wm_threads = 64 /* threads-per-PSD */1270* devinfo->num_slices1271* 4; /* effective subslices per slice */1272break;1273case 11:1274case 12:1275devinfo->max_wm_threads = 128 /* threads-per-PSD */1276* devinfo->num_slices1277* 8; /* subslices per slice */1278break;1279default:1280assert(devinfo->ver < 9);1281break;1282}12831284assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));12851286if (devinfo->verx10 == 0)1287devinfo->verx10 = devinfo->ver * 10;12881289devinfo->chipset_id = pci_id;1290return true;1291}12921293const char *1294intel_get_device_name(int devid)1295{1296switch (devid) {1297#undef CHIPSET1298#define CHIPSET(id, family, fam_str, name) case id: return name " (" fam_str ")"; break;1299#include "pci_ids/i965_pci_ids.h"1300#include "pci_ids/iris_pci_ids.h"1301default:1302return NULL;1303}1304}13051306/**1307* for gfx8/gfx9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology1308* (kernel 4.13+)1309*/1310static bool1311getparam_topology(struct intel_device_info *devinfo, int fd)1312{1313int slice_mask = 0;1314if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask))1315goto maybe_warn;13161317int n_eus;1318if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus))1319goto maybe_warn;13201321int subslice_mask = 0;1322if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask))1323goto maybe_warn;13241325return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);13261327maybe_warn:1328/* Only with Gfx8+ are we starting to see devices with fusing that can only1329* be detected at runtime.1330*/1331if (devinfo->ver >= 8)1332mesa_logw("Kernel 4.1 required to properly query GPU properties.");13331334return false;1335}13361337/**1338* preferred API for updating the topology in devinfo (kernel 4.17+)1339*/1340static bool1341query_topology(struct intel_device_info *devinfo, int fd)1342{1343struct drm_i915_query_item item = {1344.query_id = DRM_I915_QUERY_TOPOLOGY_INFO,1345};1346struct drm_i915_query query = {1347.num_items = 1,1348.items_ptr = (uintptr_t) &item,1349};13501351if (intel_ioctl(fd, DRM_IOCTL_I915_QUERY, &query))1352return false;13531354if (item.length < 0)1355return false;13561357struct drm_i915_query_topology_info *topo_info =1358(struct drm_i915_query_topology_info *) calloc(1, item.length);1359item.data_ptr = (uintptr_t) topo_info;13601361if (intel_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) ||1362item.length <= 0)1363return false;13641365update_from_topology(devinfo, topo_info);13661367free(topo_info);13681369return true;13701371}13721373int1374intel_get_aperture_size(int fd, uint64_t *size)1375{1376struct drm_i915_gem_get_aperture aperture = { 0 };13771378int ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);1379if (ret == 0 && size)1380*size = aperture.aper_size;13811382return ret;1383}13841385static bool1386has_get_tiling(int fd)1387{1388int ret;13891390struct drm_i915_gem_create gem_create = {1391.size = 4096,1392};13931394if (intel_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) {1395unreachable("Failed to create GEM BO");1396return false;1397}13981399struct drm_i915_gem_get_tiling get_tiling = {1400.handle = gem_create.handle,1401};1402ret = intel_ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &get_tiling);14031404struct drm_gem_close close = {1405.handle = gem_create.handle,1406};1407intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);14081409return ret == 0;1410}14111412bool1413intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)1414{1415int devid = 0;14161417const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");1418if (devid_override && strlen(devid_override) > 0) {1419if (geteuid() == getuid()) {1420devid = intel_device_name_to_pci_device_id(devid_override);1421/* Fallback to PCI ID. */1422if (devid <= 0)1423devid = strtol(devid_override, NULL, 0);1424if (devid <= 0) {1425mesa_loge("Invalid INTEL_DEVID_OVERRIDE=\"%s\". "1426"Use a valid numeric PCI ID or one of the supported "1427"platform names:", devid_override);1428for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++)1429mesa_loge(" %s", name_map[i].name);1430return false;1431}1432} else {1433mesa_logi("Ignoring INTEL_DEVID_OVERRIDE=\"%s\" because "1434"real and effective user ID don't match.", devid_override);1435}1436}14371438if (devid > 0) {1439if (!intel_get_device_info_from_pci_id(devid, devinfo))1440return false;1441devinfo->no_hw = true;1442} else {1443/* query the device id */1444if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid))1445return false;1446if (!intel_get_device_info_from_pci_id(devid, devinfo))1447return false;1448devinfo->no_hw = false;1449}14501451if (devinfo->ver == 10) {1452mesa_loge("Gfx10 support is redacted.");1453return false;1454}14551456/* remaining initializion queries the kernel for device info */1457if (devinfo->no_hw)1458return true;14591460int timestamp_frequency;1461if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY,1462×tamp_frequency))1463devinfo->timestamp_frequency = timestamp_frequency;1464else if (devinfo->ver >= 10) {1465mesa_loge("Kernel 4.15 required to read the CS timestamp frequency.");1466return false;1467}14681469if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))1470devinfo->revision = 0;14711472if (!query_topology(devinfo, fd)) {1473if (devinfo->ver >= 10) {1474/* topology uAPI required for CNL+ (kernel 4.17+) */1475return false;1476}14771478/* else use the kernel 4.13+ api for gfx8+. For older kernels, topology1479* will be wrong, affecting GPU metrics. In this case, fail silently.1480*/1481getparam_topology(devinfo, fd);1482}14831484intel_get_aperture_size(fd, &devinfo->aperture_bytes);1485devinfo->has_tiling_uapi = has_get_tiling(fd);14861487return true;1488}148914901491