/* SPDX-License-Identifier: GPL-2.0-or-later */1/*2* Memory copy functions for 32-bit PowerPC.3*4* Copyright (C) 1996-2005 Paul Mackerras.5*/6#include <linux/export.h>7#include <asm/processor.h>8#include <asm/cache.h>9#include <asm/errno.h>10#include <asm/ppc_asm.h>11#include <asm/code-patching-asm.h>12#include <asm/kasan.h>1314#define COPY_16_BYTES \15lwz r7,4(r4); \16lwz r8,8(r4); \17lwz r9,12(r4); \18lwzu r10,16(r4); \19stw r7,4(r6); \20stw r8,8(r6); \21stw r9,12(r6); \22stwu r10,16(r6)2324#define COPY_16_BYTES_WITHEX(n) \258 ## n ## 0: \26lwz r7,4(r4); \278 ## n ## 1: \28lwz r8,8(r4); \298 ## n ## 2: \30lwz r9,12(r4); \318 ## n ## 3: \32lwzu r10,16(r4); \338 ## n ## 4: \34stw r7,4(r6); \358 ## n ## 5: \36stw r8,8(r6); \378 ## n ## 6: \38stw r9,12(r6); \398 ## n ## 7: \40stwu r10,16(r6)4142#define COPY_16_BYTES_EXCODE(n) \439 ## n ## 0: \44addi r5,r5,-(16 * n); \45b 104f; \469 ## n ## 1: \47addi r5,r5,-(16 * n); \48b 105f; \49EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \50EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \51EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \52EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \53EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \54EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \55EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \56EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)5758.text5960CACHELINE_BYTES = L1_CACHE_BYTES61LG_CACHELINE_BYTES = L1_CACHE_SHIFT62CACHELINE_MASK = (L1_CACHE_BYTES-1)6364#ifndef CONFIG_KASAN65_GLOBAL(memset16)66rlwinm. r0 ,r5, 31, 1, 3167addi r6, r3, -468beq- 2f69rlwimi r4 ,r4 ,16 ,0 ,1570mtctr r0711: stwu r4, 4(r6)72bdnz 1b732: andi. r0, r5, 174beqlr75sth r4, 4(r6)76blr77EXPORT_SYMBOL(memset16)78#endif7980/*81* Use dcbz on the complete cache lines in the destination82* to set them to zero. This requires that the destination83* area is cacheable. -- paulus84*85* During early init, cache might not be active yet, so dcbz cannot be used.86* We therefore skip the optimised bloc that uses dcbz. This jump is87* replaced by a nop once cache is active. This is done in machine_init()88*/89_GLOBAL_KASAN(memset)90cmplwi 0,r5,491blt 7f9293rlwimi r4,r4,8,16,2394rlwimi r4,r4,16,0,159596stw r4,0(r3)97beqlr98andi. r0,r3,399add r5,r0,r5100subf r6,r0,r3101cmplwi 0,r4,0102/*103* Skip optimised bloc until cache is enabled. Will be replaced104* by 'bne' during boot to use normal procedure if r4 is not zero105*/1065: b 2f107patch_site 5b, patch__memset_nocache108109clrlwi r7,r6,32-LG_CACHELINE_BYTES110add r8,r7,r5111srwi r9,r8,LG_CACHELINE_BYTES112addic. r9,r9,-1 /* total number of complete cachelines */113ble 2f114xori r0,r7,CACHELINE_MASK & ~3115srwi. r0,r0,2116beq 3f117mtctr r01184: stwu r4,4(r6)119bdnz 4b1203: mtctr r9121li r7,412210: dcbz r7,r6123addi r6,r6,CACHELINE_BYTES124bdnz 10b125clrlwi r5,r8,32-LG_CACHELINE_BYTES126addi r5,r5,41271282: srwi r0,r5,2129mtctr r0130bdz 6f1311: stwu r4,4(r6)132bdnz 1b1336: andi. r5,r5,3134beqlr135mtctr r5136addi r6,r6,31378: stbu r4,1(r6)138bdnz 8b139blr1401417: cmpwi 0,r5,0142beqlr143mtctr r5144addi r6,r3,-11459: stbu r4,1(r6)146bdnz 9b147blr148EXPORT_SYMBOL(memset)149EXPORT_SYMBOL_KASAN(memset)150151/*152* This version uses dcbz on the complete cache lines in the153* destination area to reduce memory traffic. This requires that154* the destination area is cacheable.155* We only use this version if the source and dest don't overlap.156* -- paulus.157*158* During early init, cache might not be active yet, so dcbz cannot be used.159* We therefore jump to generic_memcpy which doesn't use dcbz. This jump is160* replaced by a nop once cache is active. This is done in machine_init()161*/162_GLOBAL_KASAN(memmove)163cmplw 0,r3,r4164bgt backwards_memcpy165/* fall through */166167_GLOBAL_KASAN(memcpy)1681: b generic_memcpy169patch_site 1b, patch__memcpy_nocache170171add r7,r3,r5 /* test if the src & dst overlap */172add r8,r4,r5173cmplw 0,r4,r7174cmplw 1,r3,r8175crand 0,0,4 /* cr0.lt &= cr1.lt */176blt generic_memcpy /* if regions overlap */177178addi r4,r4,-4179addi r6,r3,-4180neg r0,r3181andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */182beq 58f183184cmplw 0,r5,r0 /* is this more than total to do? */185blt 63f /* if not much to do */186andi. r8,r0,3 /* get it word-aligned first */187subf r5,r0,r5188mtctr r8189beq+ 61f19070: lbz r9,4(r4) /* do some bytes */191addi r4,r4,1192addi r6,r6,1193stb r9,3(r6)194bdnz 70b19561: srwi. r0,r0,2196mtctr r0197beq 58f19872: lwzu r9,4(r4) /* do some words */199stwu r9,4(r6)200bdnz 72b20120258: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */203clrlwi r5,r5,32-LG_CACHELINE_BYTES204li r11,4205mtctr r0206beq 63f20753:208dcbz r11,r6209COPY_16_BYTES210#if L1_CACHE_BYTES >= 32211COPY_16_BYTES212#if L1_CACHE_BYTES >= 64213COPY_16_BYTES214COPY_16_BYTES215#if L1_CACHE_BYTES >= 128216COPY_16_BYTES217COPY_16_BYTES218COPY_16_BYTES219COPY_16_BYTES220#endif221#endif222#endif223bdnz 53b22422563: srwi. r0,r5,2226mtctr r0227beq 64f22830: lwzu r0,4(r4)229stwu r0,4(r6)230bdnz 30b23123264: andi. r0,r5,3233mtctr r0234beq+ 65f235addi r4,r4,3236addi r6,r6,323740: lbzu r0,1(r4)238stbu r0,1(r6)239bdnz 40b24065: blr241EXPORT_SYMBOL(memcpy)242EXPORT_SYMBOL(memmove)243EXPORT_SYMBOL_KASAN(memcpy)244EXPORT_SYMBOL_KASAN(memmove)245246generic_memcpy:247srwi. r7,r5,3248addi r6,r3,-4249addi r4,r4,-4250beq 2f /* if less than 8 bytes to do */251andi. r0,r6,3 /* get dest word aligned */252mtctr r7253bne 5f2541: lwz r7,4(r4)255lwzu r8,8(r4)256stw r7,4(r6)257stwu r8,8(r6)258bdnz 1b259andi. r5,r5,72602: cmplwi 0,r5,4261blt 3f262lwzu r0,4(r4)263addi r5,r5,-4264stwu r0,4(r6)2653: cmpwi 0,r5,0266beqlr267mtctr r5268addi r4,r4,3269addi r6,r6,32704: lbzu r0,1(r4)271stbu r0,1(r6)272bdnz 4b273blr2745: subfic r0,r0,4275mtctr r02766: lbz r7,4(r4)277addi r4,r4,1278stb r7,4(r6)279addi r6,r6,1280bdnz 6b281subf r5,r0,r5282rlwinm. r7,r5,32-3,3,31283beq 2b284mtctr r7285b 1b286287_GLOBAL(backwards_memcpy)288rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */289add r6,r3,r5290add r4,r4,r5291beq 2f292andi. r0,r6,3293mtctr r7294bne 5f2951: lwz r7,-4(r4)296lwzu r8,-8(r4)297stw r7,-4(r6)298stwu r8,-8(r6)299bdnz 1b300andi. r5,r5,73012: cmplwi 0,r5,4302blt 3f303lwzu r0,-4(r4)304subi r5,r5,4305stwu r0,-4(r6)3063: cmpwi 0,r5,0307beqlr308mtctr r53094: lbzu r0,-1(r4)310stbu r0,-1(r6)311bdnz 4b312blr3135: mtctr r03146: lbzu r7,-1(r4)315stbu r7,-1(r6)316bdnz 6b317subf r5,r0,r5318rlwinm. r7,r5,32-3,3,31319beq 2b320mtctr r7321b 1b322323_GLOBAL(__copy_tofrom_user)324addi r4,r4,-4325addi r6,r3,-4326neg r0,r3327andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */328beq 58f329330cmplw 0,r5,r0 /* is this more than total to do? */331blt 63f /* if not much to do */332andi. r8,r0,3 /* get it word-aligned first */333mtctr r8334beq+ 61f33570: lbz r9,4(r4) /* do some bytes */33671: stb r9,4(r6)337addi r4,r4,1338addi r6,r6,1339bdnz 70b34061: subf r5,r0,r5341srwi. r0,r0,2342mtctr r0343beq 58f34472: lwzu r9,4(r4) /* do some words */34573: stwu r9,4(r6)346bdnz 72b347348EX_TABLE(70b,100f)349EX_TABLE(71b,101f)350EX_TABLE(72b,102f)351EX_TABLE(73b,103f)35235358: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */354clrlwi r5,r5,32-LG_CACHELINE_BYTES355li r11,4356beq 63f357358/* Here we decide how far ahead to prefetch the source */359li r3,4360cmpwi r0,1361li r7,0362ble 114f363li r7,1364#if MAX_COPY_PREFETCH > 1365/* Heuristically, for large transfers we prefetch366MAX_COPY_PREFETCH cachelines ahead. For small transfers367we prefetch 1 cacheline ahead. */368cmpwi r0,MAX_COPY_PREFETCH369ble 112f370li r7,MAX_COPY_PREFETCH371112: mtctr r7372111: dcbt r3,r4373addi r3,r3,CACHELINE_BYTES374bdnz 111b375#else376dcbt r3,r4377addi r3,r3,CACHELINE_BYTES378#endif /* MAX_COPY_PREFETCH > 1 */379380114: subf r8,r7,r0381mr r0,r7382mtctr r838338453: dcbt r3,r438554: dcbz r11,r6386EX_TABLE(54b,105f)387/* the main body of the cacheline loop */388COPY_16_BYTES_WITHEX(0)389#if L1_CACHE_BYTES >= 32390COPY_16_BYTES_WITHEX(1)391#if L1_CACHE_BYTES >= 64392COPY_16_BYTES_WITHEX(2)393COPY_16_BYTES_WITHEX(3)394#if L1_CACHE_BYTES >= 128395COPY_16_BYTES_WITHEX(4)396COPY_16_BYTES_WITHEX(5)397COPY_16_BYTES_WITHEX(6)398COPY_16_BYTES_WITHEX(7)399#endif400#endif401#endif402bdnz 53b403cmpwi r0,0404li r3,4405li r7,0406bne 114b40740863: srwi. r0,r5,2409mtctr r0410beq 64f41130: lwzu r0,4(r4)41231: stwu r0,4(r6)413bdnz 30b41441564: andi. r0,r5,3416mtctr r0417beq+ 65f41840: lbz r0,4(r4)41941: stb r0,4(r6)420addi r4,r4,1421addi r6,r6,1422bdnz 40b42365: li r3,0424blr425426/* read fault, initial single-byte copy */427100: li r9,0428b 90f429/* write fault, initial single-byte copy */430101: li r9,143190: subf r5,r8,r5432li r3,0433b 99f434/* read fault, initial word copy */435102: li r9,0436b 91f437/* write fault, initial word copy */438103: li r9,143991: li r3,2440b 99f441442/*443* this stuff handles faults in the cacheline loop and branches to either444* 104f (if in read part) or 105f (if in write part), after updating r5445*/446COPY_16_BYTES_EXCODE(0)447#if L1_CACHE_BYTES >= 32448COPY_16_BYTES_EXCODE(1)449#if L1_CACHE_BYTES >= 64450COPY_16_BYTES_EXCODE(2)451COPY_16_BYTES_EXCODE(3)452#if L1_CACHE_BYTES >= 128453COPY_16_BYTES_EXCODE(4)454COPY_16_BYTES_EXCODE(5)455COPY_16_BYTES_EXCODE(6)456COPY_16_BYTES_EXCODE(7)457#endif458#endif459#endif460461/* read fault in cacheline loop */462104: li r9,0463b 92f464/* fault on dcbz (effectively a write fault) */465/* or write fault in cacheline loop */466105: li r9,146792: li r3,LG_CACHELINE_BYTES468mfctr r8469add r0,r0,r8470b 106f471/* read fault in final word loop */472108: li r9,0473b 93f474/* write fault in final word loop */475109: li r9,147693: andi. r5,r5,3477li r3,2478b 99f479/* read fault in final byte loop */480110: li r9,0481b 94f482/* write fault in final byte loop */483111: li r9,148494: li r5,0485li r3,0486/*487* At this stage the number of bytes not copied is488* r5 + (ctr << r3), and r9 is 0 for read or 1 for write.489*/49099: mfctr r0491106: slw r3,r0,r3492add. r3,r3,r5493beq 120f /* shouldn't happen */494cmpwi 0,r9,0495bne 120f496/* for a read fault, first try to continue the copy one byte at a time */497mtctr r3498130: lbz r0,4(r4)499131: stb r0,4(r6)500addi r4,r4,1501addi r6,r6,1502bdnz 130b503/* then clear out the destination: r3 bytes starting at 4(r6) */504132: mfctr r3505120: blr506507EX_TABLE(30b,108b)508EX_TABLE(31b,109b)509EX_TABLE(40b,110b)510EX_TABLE(41b,111b)511EX_TABLE(130b,132b)512EX_TABLE(131b,120b)513514EXPORT_SYMBOL(__copy_tofrom_user)515516517