Path: blob/master/drivers/accel/habanalabs/common/mmu/mmu_v1.c
26488 views
// SPDX-License-Identifier: GPL-2.012/*3* Copyright 2016-2019 HabanaLabs, Ltd.4* All Rights Reserved.5*/67#include "../habanalabs.h"8#include "../../include/hw_ip/mmu/mmu_general.h"910#include <linux/slab.h>1112#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1)1314static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,15u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)16{17u64 mask, shift;1819mask = mmu_prop->hop_masks[hop_idx];20shift = mmu_prop->hop_shifts[hop_idx];21return hop_addr_arr[hop_idx] +22ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);23}2425static int dram_default_mapping_init(struct hl_ctx *ctx)26{27struct hl_device *hdev = ctx->hdev;28struct asic_fixed_properties *prop = &hdev->asic_prop;29u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,30hop2_pte_addr, hop3_pte_addr, pte_val;31int rc, i, j, hop3_allocated = 0;3233if ((!prop->dram_supports_virtual_memory) ||34(!hdev->dram_default_page_mapping) ||35(ctx->asid == HL_KERNEL_ASID_ID))36return 0;3738num_of_hop3 = prop->dram_size_for_default_page_mapping;39do_div(num_of_hop3, prop->dram_page_size);40do_div(num_of_hop3, HOP_PTE_ENTRIES_512);4142/* add hop1 and hop2 */43total_hops = num_of_hop3 + 2;4445ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE, GFP_KERNEL);46if (!ctx->dram_default_hops)47return -ENOMEM;4849hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);5051hop1_addr = hl_mmu_dr_alloc_hop(ctx);52if (hop1_addr == ULLONG_MAX) {53dev_err(hdev->dev, "failed to alloc hop 1\n");54rc = -ENOMEM;55goto hop1_err;56}5758ctx->dram_default_hops[total_hops - 1] = hop1_addr;5960hop2_addr = hl_mmu_dr_alloc_hop(ctx);61if (hop2_addr == ULLONG_MAX) {62dev_err(hdev->dev, "failed to alloc hop 2\n");63rc = -ENOMEM;64goto hop2_err;65}6667ctx->dram_default_hops[total_hops - 2] = hop2_addr;6869for (i = 0 ; i < num_of_hop3 ; i++) {70ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx);71if (ctx->dram_default_hops[i] == ULLONG_MAX) {72dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);73rc = -ENOMEM;74goto hop3_err;75}76hop3_allocated++;77}7879/* need only pte 0 in hops 0 and 1 */80pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;81hl_mmu_dr_write_pte(ctx, hop0_addr, pte_val);8283pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;84hl_mmu_dr_write_pte(ctx, hop1_addr, pte_val);85hl_mmu_dr_get_pte(ctx, hop1_addr);8687hop2_pte_addr = hop2_addr;88for (i = 0 ; i < num_of_hop3 ; i++) {89pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |90PAGE_PRESENT_MASK;91hl_mmu_dr_write_pte(ctx, hop2_pte_addr, pte_val);92hl_mmu_dr_get_pte(ctx, hop2_addr);93hop2_pte_addr += HL_PTE_SIZE;94}9596pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |97LAST_MASK | PAGE_PRESENT_MASK;9899for (i = 0 ; i < num_of_hop3 ; i++) {100hop3_pte_addr = ctx->dram_default_hops[i];101for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {102hl_mmu_dr_write_final_pte(ctx, hop3_pte_addr, pte_val);103hl_mmu_dr_get_pte(ctx, ctx->dram_default_hops[i]);104hop3_pte_addr += HL_PTE_SIZE;105}106}107108hl_mmu_dr_flush(ctx);109110return 0;111112hop3_err:113for (i = 0 ; i < hop3_allocated ; i++)114hl_mmu_dr_free_hop(ctx, ctx->dram_default_hops[i]);115116hl_mmu_dr_free_hop(ctx, hop2_addr);117hop2_err:118hl_mmu_dr_free_hop(ctx, hop1_addr);119hop1_err:120kfree(ctx->dram_default_hops);121122return rc;123}124125static void dram_default_mapping_fini(struct hl_ctx *ctx)126{127struct hl_device *hdev = ctx->hdev;128struct asic_fixed_properties *prop = &hdev->asic_prop;129u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,130hop2_pte_addr, hop3_pte_addr;131int i, j;132133if ((!prop->dram_supports_virtual_memory) ||134(!hdev->dram_default_page_mapping) ||135(ctx->asid == HL_KERNEL_ASID_ID))136return;137138num_of_hop3 = prop->dram_size_for_default_page_mapping;139do_div(num_of_hop3, prop->dram_page_size);140do_div(num_of_hop3, HOP_PTE_ENTRIES_512);141142hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);143/* add hop1 and hop2 */144total_hops = num_of_hop3 + 2;145hop1_addr = ctx->dram_default_hops[total_hops - 1];146hop2_addr = ctx->dram_default_hops[total_hops - 2];147148for (i = 0 ; i < num_of_hop3 ; i++) {149hop3_pte_addr = ctx->dram_default_hops[i];150for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {151hl_mmu_dr_clear_pte(ctx, hop3_pte_addr);152hl_mmu_dr_put_pte(ctx, ctx->dram_default_hops[i]);153hop3_pte_addr += HL_PTE_SIZE;154}155}156157hop2_pte_addr = hop2_addr;158for (i = 0 ; i < num_of_hop3 ; i++) {159hl_mmu_dr_clear_pte(ctx, hop2_pte_addr);160hl_mmu_dr_put_pte(ctx, hop2_addr);161hop2_pte_addr += HL_PTE_SIZE;162}163164hl_mmu_dr_clear_pte(ctx, hop1_addr);165hl_mmu_dr_put_pte(ctx, hop1_addr);166hl_mmu_dr_clear_pte(ctx, hop0_addr);167168kfree(ctx->dram_default_hops);169170hl_mmu_dr_flush(ctx);171}172173/**174* hl_mmu_v1_ctx_init() - initialize a context for using the MMU module.175* @ctx: pointer to the context structure to initialize.176*177* Initialize a mutex to protect the concurrent mapping flow, a hash to hold all178* page tables hops related to this context.179* Return: 0 on success, non-zero otherwise.180*/181static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)182{183hash_init(ctx->mmu_shadow_hash);184return dram_default_mapping_init(ctx);185}186187/*188* hl_mmu_ctx_fini - disable a ctx from using the mmu module189*190* @ctx: pointer to the context structure191*192* This function does the following:193* - Free any pgts which were not freed yet194* - Free the mutex195* - Free DRAM default page mapping hops196*/197static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)198{199struct hl_device *hdev = ctx->hdev;200struct pgt_info *pgt_info;201struct hlist_node *tmp;202int i;203204dram_default_mapping_fini(ctx);205206if (!hash_empty(ctx->mmu_shadow_hash))207dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",208ctx->asid);209210hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {211dev_err_ratelimited(hdev->dev,212"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",213pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);214hl_mmu_dr_free_pgt_node(ctx, pgt_info);215}216}217218static int hl_mmu_v1_unmap(struct hl_ctx *ctx,219u64 virt_addr, bool is_dram_addr)220{221u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;222struct hl_device *hdev = ctx->hdev;223struct asic_fixed_properties *prop = &hdev->asic_prop;224struct hl_mmu_properties *mmu_prop;225bool is_huge, clear_hop3 = true;226int hop_idx;227228/* shifts and masks are the same in PMMU and HPMMU, use one of them */229mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;230231for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {232if (hop_idx == MMU_HOP0) {233hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);234} else {235hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);236if (hop_addr[hop_idx] == ULLONG_MAX)237goto not_mapped;238}239240hop_pte_addr[hop_idx] =241get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);242243curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];244}245246is_huge = curr_pte & mmu_prop->last_mask;247248if (is_dram_addr && !is_huge) {249dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");250return -EFAULT;251}252253if (!is_huge) {254hop_idx = MMU_HOP4;255hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);256if (hop_addr[hop_idx] == ULLONG_MAX)257goto not_mapped;258259hop_pte_addr[hop_idx] =260get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);261curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];262clear_hop3 = false;263}264265if (hdev->dram_default_page_mapping && is_dram_addr) {266u64 default_pte = (prop->mmu_dram_default_page_addr &267HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |268PAGE_PRESENT_MASK;269if (curr_pte == default_pte) {270dev_err(hdev->dev,271"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",272virt_addr);273goto not_mapped;274}275276if (!(curr_pte & PAGE_PRESENT_MASK)) {277dev_err(hdev->dev,278"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",279virt_addr);280goto not_mapped;281}282283hop_idx = MMU_HOP3;284hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);285hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]);286} else {287if (!(curr_pte & PAGE_PRESENT_MASK))288goto not_mapped;289290if (hop_addr[MMU_HOP4])291hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP4]);292else293hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP3]);294295if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr[MMU_HOP4]))296clear_hop3 = true;297298if (!clear_hop3)299goto mapped;300301for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {302hl_mmu_dr_clear_pte(ctx, hop_pte_addr[hop_idx]);303304if (hop_idx == MMU_HOP0)305break;306307if (hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]))308goto mapped;309}310}311312mapped:313return 0;314315not_mapped:316dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",317virt_addr);318319return -EINVAL;320}321322static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,323u32 page_size, bool is_dram_addr)324{325u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;326struct hl_device *hdev = ctx->hdev;327struct asic_fixed_properties *prop = &hdev->asic_prop;328struct hl_mmu_properties *mmu_prop;329bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false};330int num_hops, hop_idx, prev_hop, rc = -ENOMEM;331332/*333* This mapping function can map a page or a huge page. For huge page334* there are only 3 hops rather than 4. Currently the DRAM allocation335* uses huge pages only but user memory could have been allocated with336* one of the two page sizes. Since this is a common code for all the337* three cases, we need this hugs page check.338*/339if (is_dram_addr) {340mmu_prop = &prop->dmmu;341is_huge = true;342} else if (page_size == prop->pmmu_huge.page_size) {343mmu_prop = &prop->pmmu_huge;344is_huge = true;345} else {346mmu_prop = &prop->pmmu;347is_huge = false;348}349350num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS;351352for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {353if (hop_idx == MMU_HOP0) {354hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);355} else {356hop_addr[hop_idx] =357hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);358if (hop_addr[hop_idx] == ULLONG_MAX)359goto err;360}361362hop_pte_addr[hop_idx] =363get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);364curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];365}366367if (hdev->dram_default_page_mapping && is_dram_addr) {368u64 default_pte = (prop->mmu_dram_default_page_addr &369HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |370PAGE_PRESENT_MASK;371372if (curr_pte != default_pte) {373dev_err(hdev->dev,374"DRAM: mapping already exists for virt_addr 0x%llx\n",375virt_addr);376rc = -EINVAL;377goto err;378}379380for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {381if (hop_new[hop_idx]) {382dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n");383rc = -EFAULT;384goto err;385}386}387} else if (curr_pte & PAGE_PRESENT_MASK) {388dev_err(hdev->dev,389"mapping already exists for virt_addr 0x%llx\n",390virt_addr);391392for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++)393dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx,394*(u64 *) (uintptr_t) hop_pte_addr[hop_idx],395hop_pte_addr[hop_idx]);396397rc = -EINVAL;398goto err;399}400401curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask402| PAGE_PRESENT_MASK;403404hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);405406for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {407prev_hop = hop_idx - 1;408409if (hop_new[hop_idx]) {410curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;411hl_mmu_dr_write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);412if (hop_idx != MMU_HOP1)413hl_mmu_dr_get_pte(ctx, hop_addr[prev_hop]);414}415}416417hl_mmu_dr_get_pte(ctx, hop_addr[num_hops - 1]);418419return 0;420421err:422for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {423if (hop_new[hop_idx])424hl_mmu_dr_free_hop(ctx, hop_addr[hop_idx]);425}426427return rc;428}429430/*431* hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out432*433* @ctx: pointer to the context structure434*435*/436static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)437{438439}440441/*442* hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in443*444* @ctx: pointer to the context structure445*446*/447static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)448{449450}451452static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,453struct hl_mmu_hop_info *hops)454{455struct hl_device *hdev = ctx->hdev;456struct asic_fixed_properties *prop = &hdev->asic_prop;457struct hl_mmu_properties *mmu_prop;458bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge;459int i, used_hops;460461is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,462prop->dmmu.start_addr,463prop->dmmu.end_addr);464is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size,465prop->pmmu.start_addr,466prop->pmmu.end_addr);467is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr,468prop->pmmu_huge.page_size,469prop->pmmu_huge.start_addr,470prop->pmmu_huge.end_addr);471if (is_dram_addr) {472mmu_prop = &prop->dmmu;473is_huge = true;474} else if (is_pmmu_addr) {475mmu_prop = &prop->pmmu;476is_huge = false;477} else if (is_pmmu_h_addr) {478mmu_prop = &prop->pmmu_huge;479is_huge = true;480} else {481return -EINVAL;482}483484used_hops = mmu_prop->num_hops;485486/* huge pages use lesser hops */487if (is_huge)488used_hops--;489490hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);491hops->hop_info[0].hop_pte_addr =492hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,493hops->hop_info[0].hop_addr, virt_addr);494hops->hop_info[0].hop_pte_val =495hdev->asic_funcs->read_pte(hdev,496hops->hop_info[0].hop_pte_addr);497498for (i = 1 ; i < used_hops ; i++) {499hops->hop_info[i].hop_addr =500hl_mmu_get_next_hop_addr(ctx,501hops->hop_info[i - 1].hop_pte_val);502if (hops->hop_info[i].hop_addr == ULLONG_MAX)503return -EFAULT;504505hops->hop_info[i].hop_pte_addr =506hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,507hops->hop_info[i].hop_addr,508virt_addr);509hops->hop_info[i].hop_pte_val =510hdev->asic_funcs->read_pte(hdev,511hops->hop_info[i].hop_pte_addr);512513if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))514return -EFAULT;515516if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)517break;518}519520/* if passed over all hops then no last hop was found */521if (i == mmu_prop->num_hops)522return -EFAULT;523524if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))525return -EFAULT;526527hops->used_hops = i + 1;528529return 0;530}531532/*533* hl_mmu_v1_prepare - prepare mmu for working with mmu v1534*535* @hdev: pointer to the device structure536*/537void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)538{539mmu->init = hl_mmu_dr_init;540mmu->fini = hl_mmu_dr_fini;541mmu->ctx_init = hl_mmu_v1_ctx_init;542mmu->ctx_fini = hl_mmu_v1_ctx_fini;543mmu->map = hl_mmu_v1_map;544mmu->unmap = hl_mmu_v1_unmap;545mmu->flush = hl_mmu_dr_flush;546mmu->swap_out = hl_mmu_v1_swap_out;547mmu->swap_in = hl_mmu_v1_swap_in;548mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;549}550551552