/* SPDX-License-Identifier: GPL-2.0 */1/*2* arch/alpha/lib/stxcpy.S3* Contributed by Richard Henderson ([email protected])4*5* Copy a null-terminated string from SRC to DST.6*7* This is an internal routine used by strcpy, stpcpy, and strcat.8* As such, it uses special linkage conventions to make implementation9* of these public functions more efficient.10*11* On input:12* t9 = return address13* a0 = DST14* a1 = SRC15*16* On output:17* t12 = bitmask (with one bit set) indicating the last byte written18* a0 = unaligned address of the last *word* written19*20* Furthermore, v0, a3-a5, t11, and t12 are untouched.21*/2223#include <asm/regdef.h>2425.set noat26.set noreorder2728.text2930/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that31doesn't like putting the entry point for a procedure somewhere in the32middle of the procedure descriptor. Work around this by putting the33aligned copy in its own procedure descriptor */3435.ent stxcpy_aligned36.align 337stxcpy_aligned:38.frame sp, 0, t939.prologue 04041/* On entry to this basic block:42t0 == the first destination word for masking back in43t1 == the first source word. */4445/* Create the 1st output word and detect 0's in the 1st input word. */46lda t2, -1 # e1 : build a mask against false zero47mskqh t2, a1, t2 # e0 : detection in the src word48mskqh t1, a1, t3 # e0 :49ornot t1, t2, t2 # .. e1 :50mskql t0, a1, t0 # e0 : assemble the first output word51cmpbge zero, t2, t8 # .. e1 : bits set iff null found52or t0, t3, t1 # e0 :53bne t8, $a_eos # .. e1 :5455/* On entry to this basic block:56t0 == the first destination word for masking back in57t1 == a source word not containing a null. */5859$a_loop:60stq_u t1, 0(a0) # e0 :61addq a0, 8, a0 # .. e1 :62ldq_u t1, 0(a1) # e0 :63addq a1, 8, a1 # .. e1 :64cmpbge zero, t1, t8 # e0 (stall)65beq t8, $a_loop # .. e1 (zdb)6667/* Take care of the final (partial) word store.68On entry to this basic block we have:69t1 == the source word containing the null70t8 == the cmpbge mask that found it. */71$a_eos:72negq t8, t6 # e0 : find low bit set73and t8, t6, t12 # e1 (stall)7475/* For the sake of the cache, don't read a destination word76if we're not going to need it. */77and t12, 0x80, t6 # e0 :78bne t6, 1f # .. e1 (zdb)7980/* We're doing a partial word store and so need to combine81our source and original destination words. */82ldq_u t0, 0(a0) # e0 :83subq t12, 1, t6 # .. e1 :84zapnot t1, t6, t1 # e0 : clear src bytes >= null85or t12, t6, t8 # .. e1 :86zap t0, t8, t0 # e0 : clear dst bytes <= null87or t0, t1, t1 # e1 :88891: stq_u t1, 0(a0) # e0 :90ret (t9) # .. e1 :9192.end stxcpy_aligned9394.align 395.ent __stxcpy96.globl __stxcpy97__stxcpy:98.frame sp, 0, t999.prologue 0100101/* Are source and destination co-aligned? */102xor a0, a1, t0 # e0 :103unop # :104and t0, 7, t0 # e0 :105bne t0, $unaligned # .. e1 :106107/* We are co-aligned; take care of a partial first word. */108ldq_u t1, 0(a1) # e0 : load first src word109and a0, 7, t0 # .. e1 : take care not to load a word ...110addq a1, 8, a1 # e0 :111beq t0, stxcpy_aligned # .. e1 : ... if we wont need it112ldq_u t0, 0(a0) # e0 :113br stxcpy_aligned # .. e1 :114115116/* The source and destination are not co-aligned. Align the destination117and cope. We have to be very careful about not reading too much and118causing a SEGV. */119120.align 3121$u_head:122/* We know just enough now to be able to assemble the first123full source word. We can still find a zero at the end of it124that prevents us from outputting the whole thing.125126On entry to this basic block:127t0 == the first dest word, for masking back in, if needed else 0128t1 == the low bits of the first source word129t6 == bytemask that is -1 in dest word bytes */130131ldq_u t2, 8(a1) # e0 :132addq a1, 8, a1 # .. e1 :133134extql t1, a1, t1 # e0 :135extqh t2, a1, t4 # e0 :136mskql t0, a0, t0 # e0 :137or t1, t4, t1 # .. e1 :138mskqh t1, a0, t1 # e0 :139or t0, t1, t1 # e1 :140141or t1, t6, t6 # e0 :142cmpbge zero, t6, t8 # .. e1 :143lda t6, -1 # e0 : for masking just below144bne t8, $u_final # .. e1 :145146mskql t6, a1, t6 # e0 : mask out the bits we have147or t6, t2, t2 # e1 : already extracted before148cmpbge zero, t2, t8 # e0 : testing eos149bne t8, $u_late_head_exit # .. e1 (zdb)150151/* Finally, we've got all the stupid leading edge cases taken care152of and we can set up to enter the main loop. */153154stq_u t1, 0(a0) # e0 : store first output word155addq a0, 8, a0 # .. e1 :156extql t2, a1, t0 # e0 : position ho-bits of lo word157ldq_u t2, 8(a1) # .. e1 : read next high-order source word158addq a1, 8, a1 # e0 :159cmpbge zero, t2, t8 # .. e1 :160nop # e0 :161bne t8, $u_eos # .. e1 :162163/* Unaligned copy main loop. In order to avoid reading too much,164the loop is structured to detect zeros in aligned source words.165This has, unfortunately, effectively pulled half of a loop166iteration out into the head and half into the tail, but it does167prevent nastiness from accumulating in the very thing we want168to run as fast as possible.169170On entry to this basic block:171t0 == the shifted high-order bits from the previous source word172t2 == the unshifted current source word173174We further know that t2 does not contain a null terminator. */175176.align 3177$u_loop:178extqh t2, a1, t1 # e0 : extract high bits for current word179addq a1, 8, a1 # .. e1 :180extql t2, a1, t3 # e0 : extract low bits for next time181addq a0, 8, a0 # .. e1 :182or t0, t1, t1 # e0 : current dst word now complete183ldq_u t2, 0(a1) # .. e1 : load high word for next time184stq_u t1, -8(a0) # e0 : save the current word185mov t3, t0 # .. e1 :186cmpbge zero, t2, t8 # e0 : test new word for eos187beq t8, $u_loop # .. e1 :188189/* We've found a zero somewhere in the source word we just read.190If it resides in the lower half, we have one (probably partial)191word to write out, and if it resides in the upper half, we192have one full and one partial word left to write out.193194On entry to this basic block:195t0 == the shifted high-order bits from the previous source word196t2 == the unshifted current source word. */197$u_eos:198extqh t2, a1, t1 # e0 :199or t0, t1, t1 # e1 : first (partial) source word complete200201cmpbge zero, t1, t8 # e0 : is the null in this first bit?202bne t8, $u_final # .. e1 (zdb)203204$u_late_head_exit:205stq_u t1, 0(a0) # e0 : the null was in the high-order bits206addq a0, 8, a0 # .. e1 :207extql t2, a1, t1 # e0 :208cmpbge zero, t1, t8 # .. e1 :209210/* Take care of a final (probably partial) result word.211On entry to this basic block:212t1 == assembled source word213t8 == cmpbge mask that found the null. */214$u_final:215negq t8, t6 # e0 : isolate low bit set216and t6, t8, t12 # e1 :217218and t12, 0x80, t6 # e0 : avoid dest word load if we can219bne t6, 1f # .. e1 (zdb)220221ldq_u t0, 0(a0) # e0 :222subq t12, 1, t6 # .. e1 :223or t6, t12, t8 # e0 :224zapnot t1, t6, t1 # .. e1 : kill source bytes >= null225zap t0, t8, t0 # e0 : kill dest bytes <= null226or t0, t1, t1 # e1 :2272281: stq_u t1, 0(a0) # e0 :229ret (t9) # .. e1 :230231/* Unaligned copy entry point. */232.align 3233$unaligned:234235ldq_u t1, 0(a1) # e0 : load first source word236237and a0, 7, t4 # .. e1 : find dest misalignment238and a1, 7, t5 # e0 : find src misalignment239240/* Conditionally load the first destination word and a bytemask241with 0xff indicating that the destination byte is sacrosanct. */242243mov zero, t0 # .. e1 :244mov zero, t6 # e0 :245beq t4, 1f # .. e1 :246ldq_u t0, 0(a0) # e0 :247lda t6, -1 # .. e1 :248mskql t6, a0, t6 # e0 :2491:250subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr251252/* If source misalignment is larger than dest misalignment, we need253extra startup checks to avoid SEGV. */254255cmplt t4, t5, t12 # e0 :256beq t12, $u_head # .. e1 (zdb)257258lda t2, -1 # e1 : mask out leading garbage in source259mskqh t2, t5, t2 # e0 :260nop # e0 :261ornot t1, t2, t3 # .. e1 :262cmpbge zero, t3, t8 # e0 : is there a zero?263beq t8, $u_head # .. e1 (zdb)264265/* At this point we've found a zero in the first partial word of266the source. We need to isolate the valid source data and mask267it into the original destination data. (Incidentally, we know268that we'll need at least one byte of that original dest word.) */269270ldq_u t0, 0(a0) # e0 :271272negq t8, t6 # .. e1 : build bitmask of bytes <= zero273and t6, t8, t12 # e0 :274and a1, 7, t5 # .. e1 :275subq t12, 1, t6 # e0 :276or t6, t12, t8 # e1 :277srl t12, t5, t12 # e0 : adjust final null return value278279zapnot t2, t8, t2 # .. e1 : prepare source word; mirror changes280and t1, t2, t1 # e1 : to source validity mask281extql t2, a1, t2 # .. e0 :282extql t1, a1, t1 # e0 :283284andnot t0, t2, t0 # .. e1 : zero place for source to reside285or t0, t1, t1 # e1 : and put it there286stq_u t1, 0(a0) # .. e0 :287ret (t9) # e1 :288289.end __stxcpy290291292