/* SPDX-License-Identifier: GPL-2.0 */1/*2* arch/alpha/lib/ev67-strlen.S3* 21264 version by Rick Gorton <[email protected]>4*5* Finds length of a 0-terminated string. Optimized for the6* Alpha architecture:7*8* - memory accessed as aligned quadwords only9* - uses bcmpge to compare 8 bytes in parallel10*11* Much of the information about 21264 scheduling/coding comes from:12* Compiler Writer's Guide for the Alpha 2126413* abbreviated as 'CWG' in other comments here14* ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html15* Scheduling notation:16* E - either cluster17* U - upper subcluster; U0 - subcluster U0; U1 - subcluster U118* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L119*/20#include <linux/export.h>21.set noreorder22.set noat2324.globl strlen25.ent strlen26.align 427strlen:28ldq_u $1, 0($16) # L : load first quadword ($16 may be misaligned)29lda $2, -1($31) # E :30insqh $2, $16, $2 # U :31andnot $16, 7, $0 # E :3233or $2, $1, $1 # E :34cmpbge $31, $1, $2 # E : $2 <- bitmask: bit i == 1 <==> i-th byte == 035nop # E :36bne $2, $found # U :3738$loop: ldq $1, 8($0) # L :39addq $0, 8, $0 # E : addr += 840cmpbge $31, $1, $2 # E :41beq $2, $loop # U :4243$found:44cttz $2, $3 # U0 :45addq $0, $3, $0 # E :46subq $0, $16, $0 # E :47ret $31, ($26) # L0 :4849.end strlen50EXPORT_SYMBOL(strlen)515253