/* SPDX-License-Identifier: GPL-2.0 */1/*2* arch/alpha/lib/stxncpy.S3* Contributed by Richard Henderson ([email protected])4*5* Copy no more than COUNT bytes of the null-terminated string from6* SRC to DST.7*8* This is an internal routine used by strncpy, stpncpy, and strncat.9* As such, it uses special linkage conventions to make implementation10* of these public functions more efficient.11*12* On input:13* t9 = return address14* a0 = DST15* a1 = SRC16* a2 = COUNT17*18* Furthermore, COUNT may not be zero.19*20* On output:21* t0 = last word written22* t10 = bitmask (with one bit set) indicating the byte position of23* the end of the range specified by COUNT24* t12 = bitmask (with one bit set) indicating the last byte written25* a0 = unaligned address of the last *word* written26* a2 = the number of full words left in COUNT27*28* Furthermore, v0, a3-a5, t11, and $at are untouched.29*/3031#include <asm/regdef.h>3233.set noat34.set noreorder3536.text3738/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that39doesn't like putting the entry point for a procedure somewhere in the40middle of the procedure descriptor. Work around this by putting the41aligned copy in its own procedure descriptor */4243.ent stxncpy_aligned44.align 345stxncpy_aligned:46.frame sp, 0, t9, 047.prologue 04849/* On entry to this basic block:50t0 == the first destination word for masking back in51t1 == the first source word. */5253/* Create the 1st output word and detect 0's in the 1st input word. */54lda t2, -1 # e1 : build a mask against false zero55mskqh t2, a1, t2 # e0 : detection in the src word56mskqh t1, a1, t3 # e0 :57ornot t1, t2, t2 # .. e1 :58mskql t0, a1, t0 # e0 : assemble the first output word59cmpbge zero, t2, t8 # .. e1 : bits set iff null found60or t0, t3, t0 # e0 :61beq a2, $a_eoc # .. e1 :62bne t8, $a_eos # .. e1 :6364/* On entry to this basic block:65t0 == a source word not containing a null. */6667$a_loop:68stq_u t0, 0(a0) # e0 :69addq a0, 8, a0 # .. e1 :70ldq_u t0, 0(a1) # e0 :71addq a1, 8, a1 # .. e1 :72subq a2, 1, a2 # e0 :73cmpbge zero, t0, t8 # .. e1 (stall)74beq a2, $a_eoc # e1 :75beq t8, $a_loop # e1 :7677/* Take care of the final (partial) word store. At this point78the end-of-count bit is set in t8 iff it applies.7980On entry to this basic block we have:81t0 == the source word containing the null82t8 == the cmpbge mask that found it. */8384$a_eos:85negq t8, t12 # e0 : find low bit set86and t8, t12, t12 # e1 (stall)8788/* For the sake of the cache, don't read a destination word89if we're not going to need it. */90and t12, 0x80, t6 # e0 :91bne t6, 1f # .. e1 (zdb)9293/* We're doing a partial word store and so need to combine94our source and original destination words. */95ldq_u t1, 0(a0) # e0 :96subq t12, 1, t6 # .. e1 :97or t12, t6, t8 # e0 :98unop #99zapnot t0, t8, t0 # e0 : clear src bytes > null100zap t1, t8, t1 # .. e1 : clear dst bytes <= null101or t0, t1, t0 # e1 :1021031: stq_u t0, 0(a0) # e0 :104ret (t9) # e1 :105106/* Add the end-of-count bit to the eos detection bitmask. */107$a_eoc:108or t10, t8, t8109br $a_eos110111.end stxncpy_aligned112113.align 3114.ent __stxncpy115.globl __stxncpy116__stxncpy:117.frame sp, 0, t9, 0118.prologue 0119120/* Are source and destination co-aligned? */121xor a0, a1, t1 # e0 :122and a0, 7, t0 # .. e1 : find dest misalignment123and t1, 7, t1 # e0 :124addq a2, t0, a2 # .. e1 : bias count by dest misalignment125subq a2, 1, a2 # e0 :126and a2, 7, t2 # e1 :127srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8128addq zero, 1, t10 # .. e1 :129sll t10, t2, t10 # e0 : t10 = bitmask of last count byte130bne t1, $unaligned # .. e1 :131132/* We are co-aligned; take care of a partial first word. */133134ldq_u t1, 0(a1) # e0 : load first src word135addq a1, 8, a1 # .. e1 :136137beq t0, stxncpy_aligned # avoid loading dest word if not needed138ldq_u t0, 0(a0) # e0 :139br stxncpy_aligned # .. e1 :140141142/* The source and destination are not co-aligned. Align the destination143and cope. We have to be very careful about not reading too much and144causing a SEGV. */145146.align 3147$u_head:148/* We know just enough now to be able to assemble the first149full source word. We can still find a zero at the end of it150that prevents us from outputting the whole thing.151152On entry to this basic block:153t0 == the first dest word, unmasked154t1 == the shifted low bits of the first source word155t6 == bytemask that is -1 in dest word bytes */156157ldq_u t2, 8(a1) # e0 : load second src word158addq a1, 8, a1 # .. e1 :159mskql t0, a0, t0 # e0 : mask trailing garbage in dst160extqh t2, a1, t4 # e0 :161or t1, t4, t1 # e1 : first aligned src word complete162mskqh t1, a0, t1 # e0 : mask leading garbage in src163or t0, t1, t0 # e0 : first output word complete164or t0, t6, t6 # e1 : mask original data for zero test165cmpbge zero, t6, t8 # e0 :166beq a2, $u_eocfin # .. e1 :167lda t6, -1 # e0 :168bne t8, $u_final # .. e1 :169170mskql t6, a1, t6 # e0 : mask out bits already seen171nop # .. e1 :172stq_u t0, 0(a0) # e0 : store first output word173or t6, t2, t2 # .. e1 :174cmpbge zero, t2, t8 # e0 : find nulls in second partial175addq a0, 8, a0 # .. e1 :176subq a2, 1, a2 # e0 :177bne t8, $u_late_head_exit # .. e1 :178179/* Finally, we've got all the stupid leading edge cases taken care180of and we can set up to enter the main loop. */181182extql t2, a1, t1 # e0 : position hi-bits of lo word183beq a2, $u_eoc # .. e1 :184ldq_u t2, 8(a1) # e0 : read next high-order source word185addq a1, 8, a1 # .. e1 :186extqh t2, a1, t0 # e0 : position lo-bits of hi word (stall)187cmpbge zero, t2, t8 # .. e1 :188nop # e0 :189bne t8, $u_eos # .. e1 :190191/* Unaligned copy main loop. In order to avoid reading too much,192the loop is structured to detect zeros in aligned source words.193This has, unfortunately, effectively pulled half of a loop194iteration out into the head and half into the tail, but it does195prevent nastiness from accumulating in the very thing we want196to run as fast as possible.197198On entry to this basic block:199t0 == the shifted low-order bits from the current source word200t1 == the shifted high-order bits from the previous source word201t2 == the unshifted current source word202203We further know that t2 does not contain a null terminator. */204205.align 3206$u_loop:207or t0, t1, t0 # e0 : current dst word now complete208subq a2, 1, a2 # .. e1 : decrement word count209stq_u t0, 0(a0) # e0 : save the current word210addq a0, 8, a0 # .. e1 :211extql t2, a1, t1 # e0 : extract high bits for next time212beq a2, $u_eoc # .. e1 :213ldq_u t2, 8(a1) # e0 : load high word for next time214addq a1, 8, a1 # .. e1 :215nop # e0 :216cmpbge zero, t2, t8 # e1 : test new word for eos (stall)217extqh t2, a1, t0 # e0 : extract low bits for current word218beq t8, $u_loop # .. e1 :219220/* We've found a zero somewhere in the source word we just read.221If it resides in the lower half, we have one (probably partial)222word to write out, and if it resides in the upper half, we223have one full and one partial word left to write out.224225On entry to this basic block:226t0 == the shifted low-order bits from the current source word227t1 == the shifted high-order bits from the previous source word228t2 == the unshifted current source word. */229$u_eos:230or t0, t1, t0 # e0 : first (partial) source word complete231nop # .. e1 :232cmpbge zero, t0, t8 # e0 : is the null in this first bit?233bne t8, $u_final # .. e1 (zdb)234235stq_u t0, 0(a0) # e0 : the null was in the high-order bits236addq a0, 8, a0 # .. e1 :237subq a2, 1, a2 # e1 :238239$u_late_head_exit:240extql t2, a1, t0 # .. e0 :241cmpbge zero, t0, t8 # e0 :242or t8, t10, t6 # e1 :243cmoveq a2, t6, t8 # e0 :244nop # .. e1 :245246/* Take care of a final (probably partial) result word.247On entry to this basic block:248t0 == assembled source word249t8 == cmpbge mask that found the null. */250$u_final:251negq t8, t6 # e0 : isolate low bit set252and t6, t8, t12 # e1 :253254and t12, 0x80, t6 # e0 : avoid dest word load if we can255bne t6, 1f # .. e1 (zdb)256257ldq_u t1, 0(a0) # e0 :258subq t12, 1, t6 # .. e1 :259or t6, t12, t8 # e0 :260zapnot t0, t8, t0 # .. e1 : kill source bytes > null261zap t1, t8, t1 # e0 : kill dest bytes <= null262or t0, t1, t0 # e1 :2632641: stq_u t0, 0(a0) # e0 :265ret (t9) # .. e1 :266267/* Got to end-of-count before end of string.268On entry to this basic block:269t1 == the shifted high-order bits from the previous source word */270$u_eoc:271and a1, 7, t6 # e1 :272sll t10, t6, t6 # e0 :273and t6, 0xff, t6 # e0 :274bne t6, 1f # .. e1 :275276ldq_u t2, 8(a1) # e0 : load final src word277nop # .. e1 :278extqh t2, a1, t0 # e0 : extract low bits for last word279or t1, t0, t1 # e1 :2802811: cmpbge zero, t1, t8282mov t1, t0283284$u_eocfin: # end-of-count, final word285or t10, t8, t8286br $u_final287288/* Unaligned copy entry point. */289.align 3290$unaligned:291292ldq_u t1, 0(a1) # e0 : load first source word293294and a0, 7, t4 # .. e1 : find dest misalignment295and a1, 7, t5 # e0 : find src misalignment296297/* Conditionally load the first destination word and a bytemask298with 0xff indicating that the destination byte is sacrosanct. */299300mov zero, t0 # .. e1 :301mov zero, t6 # e0 :302beq t4, 1f # .. e1 :303ldq_u t0, 0(a0) # e0 :304lda t6, -1 # .. e1 :305mskql t6, a0, t6 # e0 :306subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr307308/* If source misalignment is larger than dest misalignment, we need309extra startup checks to avoid SEGV. */3103111: cmplt t4, t5, t12 # e1 :312extql t1, a1, t1 # .. e0 : shift src into place313lda t2, -1 # e0 : for creating masks later314beq t12, $u_head # .. e1 :315316extql t2, a1, t2 # e0 :317cmpbge zero, t1, t8 # .. e1 : is there a zero?318andnot t2, t6, t2 # e0 : dest mask for a single word copy319or t8, t10, t5 # .. e1 : test for end-of-count too320cmpbge zero, t2, t3 # e0 :321cmoveq a2, t5, t8 # .. e1 :322andnot t8, t3, t8 # e0 :323beq t8, $u_head # .. e1 (zdb)324325/* At this point we've found a zero in the first partial word of326the source. We need to isolate the valid source data and mask327it into the original destination data. (Incidentally, we know328that we'll need at least one byte of that original dest word.) */329330ldq_u t0, 0(a0) # e0 :331negq t8, t6 # .. e1 : build bitmask of bytes <= zero332mskqh t1, t4, t1 # e0 :333and t6, t8, t12 # .. e1 :334subq t12, 1, t6 # e0 :335or t6, t12, t8 # e1 :336337zapnot t2, t8, t2 # e0 : prepare source word; mirror changes338zapnot t1, t8, t1 # .. e1 : to source validity mask339340andnot t0, t2, t0 # e0 : zero place for source to reside341or t0, t1, t0 # e1 : and put it there342stq_u t0, 0(a0) # e0 :343ret (t9) # .. e1 :344345.end __stxncpy346347348