Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
4574 views
1#include "util/format/u_format.h"23#include "nv50/nv50_context.h"45#include "nv50/g80_defs.xml.h"67struct nv50_transfer {8struct pipe_transfer base;9struct nv50_m2mf_rect rect[2];10uint32_t nblocksx;11uint32_t nblocksy;12};1314void15nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,16struct pipe_resource *restrict res, unsigned l,17unsigned x, unsigned y, unsigned z)18{19struct nv50_miptree *mt = nv50_miptree(res);20const unsigned w = u_minify(res->width0, l);21const unsigned h = u_minify(res->height0, l);2223rect->bo = mt->base.bo;24rect->domain = mt->base.domain;25rect->base = mt->level[l].offset;26if (mt->base.bo->offset != mt->base.address)27rect->base += mt->base.address - mt->base.bo->offset;28rect->pitch = mt->level[l].pitch;29if (util_format_is_plain(res->format)) {30rect->width = w << mt->ms_x;31rect->height = h << mt->ms_y;32rect->x = x << mt->ms_x;33rect->y = y << mt->ms_y;34} else {35rect->width = util_format_get_nblocksx(res->format, w);36rect->height = util_format_get_nblocksy(res->format, h);37rect->x = util_format_get_nblocksx(res->format, x);38rect->y = util_format_get_nblocksy(res->format, y);39}40rect->tile_mode = mt->level[l].tile_mode;41rect->cpp = util_format_get_blocksize(res->format);4243if (mt->layout_3d) {44rect->z = z;45rect->depth = u_minify(res->depth0, l);46} else {47rect->base += z * mt->layer_stride;48rect->z = 0;49rect->depth = 1;50}51}5253/* This is very similar to nv50_2d_texture_do_copy, but doesn't require54* miptree objects. Maybe refactor? Although it's not straightforward.55*/56static void57nv50_2d_transfer_rect(struct nv50_context *nv50,58const struct nv50_m2mf_rect *dst,59const struct nv50_m2mf_rect *src,60uint32_t nblocksx, uint32_t nblocksy)61{62struct nouveau_pushbuf *push = nv50->base.pushbuf;63struct nouveau_bufctx *bctx = nv50->bufctx;64const int cpp = dst->cpp;6566nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);67nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);68nouveau_pushbuf_bufctx(push, bctx);69nouveau_pushbuf_validate(push);7071uint32_t format;72switch (cpp) {73case 1:74format = G80_SURFACE_FORMAT_R8_UNORM;75break;76case 2:77format = G80_SURFACE_FORMAT_R16_UNORM;78break;79case 4:80format = G80_SURFACE_FORMAT_BGRA8_UNORM;81break;82case 8:83format = G80_SURFACE_FORMAT_RGBA16_FLOAT;84break;85case 16:86format = G80_SURFACE_FORMAT_RGBA32_FLOAT;87break;88default:89assert(!"Unexpected cpp");90format = G80_SURFACE_FORMAT_R8_UNORM;91}9293if (nouveau_bo_memtype(src->bo)) {94BEGIN_NV04(push, NV50_2D(SRC_FORMAT), 5);95PUSH_DATA (push, format);96PUSH_DATA (push, 0);97PUSH_DATA (push, src->tile_mode);98PUSH_DATA (push, src->depth);99PUSH_DATA (push, src->z);100BEGIN_NV04(push, NV50_2D(SRC_WIDTH), 4);101PUSH_DATA (push, src->width);102PUSH_DATA (push, src->height);103PUSH_DATAh(push, src->bo->offset + src->base);104PUSH_DATA (push, src->bo->offset + src->base);105} else {106BEGIN_NV04(push, NV50_2D(SRC_FORMAT), 2);107PUSH_DATA (push, format);108PUSH_DATA (push, 1);109BEGIN_NV04(push, NV50_2D(SRC_PITCH), 5);110PUSH_DATA (push, src->pitch);111PUSH_DATA (push, src->width);112PUSH_DATA (push, src->height);113PUSH_DATAh(push, src->bo->offset + src->base);114PUSH_DATA (push, src->bo->offset + src->base);115}116117if (nouveau_bo_memtype(dst->bo)) {118BEGIN_NV04(push, NV50_2D(DST_FORMAT), 5);119PUSH_DATA (push, format);120PUSH_DATA (push, 0);121PUSH_DATA (push, dst->tile_mode);122PUSH_DATA (push, dst->depth);123PUSH_DATA (push, dst->z);124BEGIN_NV04(push, NV50_2D(DST_WIDTH), 4);125PUSH_DATA (push, dst->width);126PUSH_DATA (push, dst->height);127PUSH_DATAh(push, dst->bo->offset + dst->base);128PUSH_DATA (push, dst->bo->offset + dst->base);129} else {130BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);131PUSH_DATA (push, format);132PUSH_DATA (push, 1);133BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);134PUSH_DATA (push, dst->pitch);135PUSH_DATA (push, dst->width);136PUSH_DATA (push, dst->height);137PUSH_DATAh(push, dst->bo->offset + dst->base);138PUSH_DATA (push, dst->bo->offset + dst->base);139}140141BEGIN_NV04(push, NV50_2D(BLIT_CONTROL), 1);142PUSH_DATA (push, NV50_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE);143BEGIN_NV04(push, NV50_2D(BLIT_DST_X), 4);144PUSH_DATA (push, dst->x);145PUSH_DATA (push, dst->y);146PUSH_DATA (push, nblocksx);147PUSH_DATA (push, nblocksy);148BEGIN_NV04(push, NV50_2D(BLIT_DU_DX_FRACT), 4);149PUSH_DATA (push, 0);150PUSH_DATA (push, 1);151PUSH_DATA (push, 0);152PUSH_DATA (push, 1);153BEGIN_NV04(push, NV50_2D(BLIT_SRC_X_FRACT), 4);154PUSH_DATA (push, 0);155PUSH_DATA (push, src->x);156PUSH_DATA (push, 0);157PUSH_DATA (push, src->y);158159nouveau_bufctx_reset(bctx, 0);160}161162void163nv50_m2mf_transfer_rect(struct nv50_context *nv50,164const struct nv50_m2mf_rect *dst,165const struct nv50_m2mf_rect *src,166uint32_t nblocksx, uint32_t nblocksy)167{168struct nouveau_pushbuf *push = nv50->base.pushbuf;169struct nouveau_bufctx *bctx = nv50->bufctx;170const int cpp = dst->cpp;171uint32_t src_ofst = src->base;172uint32_t dst_ofst = dst->base;173uint32_t height = nblocksy;174uint32_t sy = src->y;175uint32_t dy = dst->y;176177assert(dst->cpp == src->cpp);178179/* Workaround: M2MF appears to break at the 64k boundary for tiled180* textures, which can really only happen with RGBA32 formats.181*/182bool eng2d = false;183if (nouveau_bo_memtype(src->bo)) {184if (src->width * cpp > 65536)185eng2d = true;186}187if (nouveau_bo_memtype(dst->bo)) {188if (dst->width * cpp > 65536)189eng2d = true;190}191if (eng2d) {192nv50_2d_transfer_rect(nv50, dst, src, nblocksx, nblocksy);193return;194}195196nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);197nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);198nouveau_pushbuf_bufctx(push, bctx);199nouveau_pushbuf_validate(push);200201if (nouveau_bo_memtype(src->bo)) {202BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 6);203PUSH_DATA (push, 0);204PUSH_DATA (push, src->tile_mode);205PUSH_DATA (push, src->width * cpp);206PUSH_DATA (push, src->height);207PUSH_DATA (push, src->depth);208PUSH_DATA (push, src->z);209} else {210src_ofst += src->y * src->pitch + src->x * cpp;211212BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);213PUSH_DATA (push, 1);214BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_IN), 1);215PUSH_DATA (push, src->pitch);216}217218if (nouveau_bo_memtype(dst->bo)) {219BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 6);220PUSH_DATA (push, 0);221PUSH_DATA (push, dst->tile_mode);222PUSH_DATA (push, dst->width * cpp);223PUSH_DATA (push, dst->height);224PUSH_DATA (push, dst->depth);225PUSH_DATA (push, dst->z);226} else {227dst_ofst += dst->y * dst->pitch + dst->x * cpp;228229BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);230PUSH_DATA (push, 1);231BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_PITCH_OUT), 1);232PUSH_DATA (push, dst->pitch);233}234235while (height) {236int line_count = height > 2047 ? 2047 : height;237238BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);239PUSH_DATAh(push, src->bo->offset + src_ofst);240PUSH_DATAh(push, dst->bo->offset + dst_ofst);241242BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);243PUSH_DATA (push, src->bo->offset + src_ofst);244PUSH_DATA (push, dst->bo->offset + dst_ofst);245246if (nouveau_bo_memtype(src->bo)) {247BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_IN), 1);248PUSH_DATA (push, (sy << 16) | (src->x * cpp));249} else {250src_ofst += line_count * src->pitch;251}252if (nouveau_bo_memtype(dst->bo)) {253BEGIN_NV04(push, NV50_M2MF(TILING_POSITION_OUT), 1);254PUSH_DATA (push, (dy << 16) | (dst->x * cpp));255} else {256dst_ofst += line_count * dst->pitch;257}258259BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);260PUSH_DATA (push, nblocksx * cpp);261PUSH_DATA (push, line_count);262PUSH_DATA (push, (1 << 8) | (1 << 0));263PUSH_DATA (push, 0);264265height -= line_count;266sy += line_count;267dy += line_count;268}269270nouveau_bufctx_reset(bctx, 0);271}272273void274nv50_sifc_linear_u8(struct nouveau_context *nv,275struct nouveau_bo *dst, unsigned offset, unsigned domain,276unsigned size, const void *data)277{278struct nv50_context *nv50 = nv50_context(&nv->pipe);279struct nouveau_pushbuf *push = nv50->base.pushbuf;280uint32_t *src = (uint32_t *)data;281unsigned count = (size + 3) / 4;282unsigned xcoord = offset & 0xff;283284nouveau_bufctx_refn(nv50->bufctx, 0, dst, domain | NOUVEAU_BO_WR);285nouveau_pushbuf_bufctx(push, nv50->bufctx);286nouveau_pushbuf_validate(push);287288offset &= ~0xff;289290BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);291PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);292PUSH_DATA (push, 1);293BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);294PUSH_DATA (push, 262144);295PUSH_DATA (push, 65536);296PUSH_DATA (push, 1);297PUSH_DATAh(push, dst->offset + offset);298PUSH_DATA (push, dst->offset + offset);299BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);300PUSH_DATA (push, 0);301PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM);302BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);303PUSH_DATA (push, size);304PUSH_DATA (push, 1);305PUSH_DATA (push, 0);306PUSH_DATA (push, 1);307PUSH_DATA (push, 0);308PUSH_DATA (push, 1);309PUSH_DATA (push, 0);310PUSH_DATA (push, xcoord);311PUSH_DATA (push, 0);312PUSH_DATA (push, 0);313314while (count) {315unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);316317BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);318PUSH_DATAp(push, src, nr);319320src += nr;321count -= nr;322}323324nouveau_bufctx_reset(nv50->bufctx, 0);325}326327void328nv50_m2mf_copy_linear(struct nouveau_context *nv,329struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,330struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,331unsigned size)332{333struct nouveau_pushbuf *push = nv->pushbuf;334struct nouveau_bufctx *bctx = nv50_context(&nv->pipe)->bufctx;335336nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);337nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);338nouveau_pushbuf_bufctx(push, bctx);339nouveau_pushbuf_validate(push);340341BEGIN_NV04(push, NV50_M2MF(LINEAR_IN), 1);342PUSH_DATA (push, 1);343BEGIN_NV04(push, NV50_M2MF(LINEAR_OUT), 1);344PUSH_DATA (push, 1);345346while (size) {347unsigned bytes = MIN2(size, 1 << 17);348349BEGIN_NV04(push, NV50_M2MF(OFFSET_IN_HIGH), 2);350PUSH_DATAh(push, src->offset + srcoff);351PUSH_DATAh(push, dst->offset + dstoff);352BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_OFFSET_IN), 2);353PUSH_DATA (push, src->offset + srcoff);354PUSH_DATA (push, dst->offset + dstoff);355BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_LINE_LENGTH_IN), 4);356PUSH_DATA (push, bytes);357PUSH_DATA (push, 1);358PUSH_DATA (push, (1 << 8) | (1 << 0));359PUSH_DATA (push, 0);360361srcoff += bytes;362dstoff += bytes;363size -= bytes;364}365366nouveau_bufctx_reset(bctx, 0);367}368369void *370nv50_miptree_transfer_map(struct pipe_context *pctx,371struct pipe_resource *res,372unsigned level,373unsigned usage,374const struct pipe_box *box,375struct pipe_transfer **ptransfer)376{377struct nv50_screen *screen = nv50_screen(pctx->screen);378struct nv50_context *nv50 = nv50_context(pctx);379struct nouveau_device *dev = nv50->screen->base.device;380const struct nv50_miptree *mt = nv50_miptree(res);381struct nv50_transfer *tx;382uint32_t size;383int ret;384unsigned flags = 0;385386if (usage & PIPE_MAP_DIRECTLY)387return NULL;388389tx = CALLOC_STRUCT(nv50_transfer);390if (!tx)391return NULL;392393pipe_resource_reference(&tx->base.resource, res);394395tx->base.level = level;396tx->base.usage = usage;397tx->base.box = *box;398399if (util_format_is_plain(res->format)) {400tx->nblocksx = box->width << mt->ms_x;401tx->nblocksy = box->height << mt->ms_y;402} else {403tx->nblocksx = util_format_get_nblocksx(res->format, box->width);404tx->nblocksy = util_format_get_nblocksy(res->format, box->height);405}406407tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);408tx->base.layer_stride = tx->nblocksy * tx->base.stride;409410nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z);411412size = tx->base.layer_stride;413414ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,415size * tx->base.box.depth, NULL, &tx->rect[1].bo);416if (ret) {417FREE(tx);418return NULL;419}420421tx->rect[1].cpp = tx->rect[0].cpp;422tx->rect[1].width = tx->nblocksx;423tx->rect[1].height = tx->nblocksy;424tx->rect[1].depth = 1;425tx->rect[1].pitch = tx->base.stride;426tx->rect[1].domain = NOUVEAU_BO_GART;427428if (usage & PIPE_MAP_READ) {429unsigned base = tx->rect[0].base;430unsigned z = tx->rect[0].z;431unsigned i;432for (i = 0; i < box->depth; ++i) {433nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],434tx->nblocksx, tx->nblocksy);435if (mt->layout_3d)436tx->rect[0].z++;437else438tx->rect[0].base += mt->layer_stride;439tx->rect[1].base += size;440}441tx->rect[0].z = z;442tx->rect[0].base = base;443tx->rect[1].base = 0;444}445446if (tx->rect[1].bo->map) {447*ptransfer = &tx->base;448return tx->rect[1].bo->map;449}450451if (usage & PIPE_MAP_READ)452flags = NOUVEAU_BO_RD;453if (usage & PIPE_MAP_WRITE)454flags |= NOUVEAU_BO_WR;455456ret = nouveau_bo_map(tx->rect[1].bo, flags, screen->base.client);457if (ret) {458nouveau_bo_ref(NULL, &tx->rect[1].bo);459FREE(tx);460return NULL;461}462463*ptransfer = &tx->base;464return tx->rect[1].bo->map;465}466467void468nv50_miptree_transfer_unmap(struct pipe_context *pctx,469struct pipe_transfer *transfer)470{471struct nv50_context *nv50 = nv50_context(pctx);472struct nv50_transfer *tx = (struct nv50_transfer *)transfer;473struct nv50_miptree *mt = nv50_miptree(tx->base.resource);474unsigned i;475476if (tx->base.usage & PIPE_MAP_WRITE) {477for (i = 0; i < tx->base.box.depth; ++i) {478nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],479tx->nblocksx, tx->nblocksy);480if (mt->layout_3d)481tx->rect[0].z++;482else483tx->rect[0].base += mt->layer_stride;484tx->rect[1].base += tx->nblocksy * tx->base.stride;485}486487/* Allow the copies above to finish executing before freeing the source */488nouveau_fence_work(nv50->screen->base.fence.current,489nouveau_fence_unref_bo, tx->rect[1].bo);490} else {491nouveau_bo_ref(NULL, &tx->rect[1].bo);492}493494pipe_resource_reference(&transfer->resource, NULL);495496FREE(tx);497}498499static void500nv50_cb_bo_push(struct nouveau_context *nv,501struct nouveau_bo *bo, unsigned domain,502unsigned bufid,503unsigned offset, unsigned words,504const uint32_t *data)505{506struct nouveau_pushbuf *push = nv->pushbuf;507508assert(!(offset & 3));509510while (words) {511unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);512513PUSH_SPACE(push, nr + 3);514PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);515BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);516PUSH_DATA (push, (offset << 6) | bufid);517BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);518PUSH_DATAp(push, data, nr);519520words -= nr;521data += nr;522offset += nr * 4;523}524}525526void527nv50_cb_push(struct nouveau_context *nv,528struct nv04_resource *res,529unsigned offset, unsigned words, const uint32_t *data)530{531struct nv50_context *nv50 = nv50_context(&nv->pipe);532struct nv50_constbuf *cb = NULL;533int s, bufid;534/* Go through all the constbuf binding points of this buffer and try to535* find one which contains the region to be updated.536*/537for (s = 0; s < NV50_MAX_SHADER_STAGES && !cb; s++) {538uint16_t bindings = res->cb_bindings[s];539while (bindings) {540int i = ffs(bindings) - 1;541uint32_t cb_offset = nv50->constbuf[s][i].offset;542543bindings &= ~(1 << i);544if (cb_offset <= offset &&545cb_offset + nv50->constbuf[s][i].size >= offset + words * 4) {546cb = &nv50->constbuf[s][i];547bufid = s * 16 + i;548break;549}550}551}552553if (cb) {554nv50_cb_bo_push(nv, res->bo, res->domain,555bufid, offset - cb->offset, words, data);556} else {557nv->push_data(nv, res->bo, res->offset + offset, res->domain,558words * 4, data);559}560}561562563