Path: blob/master/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
40930 views
/*1* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "asm/assembler.hpp"26#include "asm/assembler.inline.hpp"27#include "opto/c2_MacroAssembler.hpp"28#include "opto/intrinsicnode.hpp"29#include "runtime/stubRoutines.hpp"3031#ifdef PRODUCT32#define BLOCK_COMMENT(str) /* nothing */33#define STOP(error) stop(error)34#else35#define BLOCK_COMMENT(str) block_comment(str)36#define STOP(error) block_comment(error); stop(error)37#endif3839#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")4041typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);4243// Search for str1 in str2 and return index or -144void C2_MacroAssembler::string_indexof(Register str2, Register str1,45Register cnt2, Register cnt1,46Register tmp1, Register tmp2,47Register tmp3, Register tmp4,48Register tmp5, Register tmp6,49int icnt1, Register result, int ae) {50// NOTE: tmp5, tmp6 can be zr depending on specific method version51Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;5253Register ch1 = rscratch1;54Register ch2 = rscratch2;55Register cnt1tmp = tmp1;56Register cnt2tmp = tmp2;57Register cnt1_neg = cnt1;58Register cnt2_neg = cnt2;59Register result_tmp = tmp4;6061bool isL = ae == StrIntrinsicNode::LL;6263bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;64bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;65int str1_chr_shift = str1_isL ? 0:1;66int str2_chr_shift = str2_isL ? 0:1;67int str1_chr_size = str1_isL ? 1:2;68int str2_chr_size = str2_isL ? 1:2;69chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :70(chr_insn)&MacroAssembler::ldrh;71chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :72(chr_insn)&MacroAssembler::ldrh;73chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;74chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;7576// Note, inline_string_indexOf() generates checks:77// if (substr.count > string.count) return -1;78// if (substr.count == 0) return 0;7980// We have two strings, a source string in str2, cnt2 and a pattern string81// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.8283// For larger pattern and source we use a simplified Boyer Moore algorithm.84// With a small pattern and source we use linear scan.8586if (icnt1 == -1) {87sub(result_tmp, cnt2, cnt1);88cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 25689br(LT, LINEARSEARCH);90dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty91subs(zr, cnt1, 256);92lsr(tmp1, cnt2, 2);93ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM94br(GE, LINEARSTUB);95}9697// The Boyer Moore alogorithm is based on the description here:-98//99// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm100//101// This describes and algorithm with 2 shift rules. The 'Bad Character' rule102// and the 'Good Suffix' rule.103//104// These rules are essentially heuristics for how far we can shift the105// pattern along the search string.106//107// The implementation here uses the 'Bad Character' rule only because of the108// complexity of initialisation for the 'Good Suffix' rule.109//110// This is also known as the Boyer-Moore-Horspool algorithm:-111//112// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm113//114// This particular implementation has few java-specific optimizations.115//116// #define ASIZE 256117//118// int bm(unsigned char *x, int m, unsigned char *y, int n) {119// int i, j;120// unsigned c;121// unsigned char bc[ASIZE];122//123// /* Preprocessing */124// for (i = 0; i < ASIZE; ++i)125// bc[i] = m;126// for (i = 0; i < m - 1; ) {127// c = x[i];128// ++i;129// // c < 256 for Latin1 string, so, no need for branch130// #ifdef PATTERN_STRING_IS_LATIN1131// bc[c] = m - i;132// #else133// if (c < ASIZE) bc[c] = m - i;134// #endif135// }136//137// /* Searching */138// j = 0;139// while (j <= n - m) {140// c = y[i+j];141// if (x[m-1] == c)142// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);143// if (i < 0) return j;144// // c < 256 for Latin1 string, so, no need for branch145// #ifdef SOURCE_STRING_IS_LATIN1146// // LL case: (c< 256) always true. Remove branch147// j += bc[y[j+m-1]];148// #endif149// #ifndef PATTERN_STRING_IS_UTF150// // UU case: need if (c<ASIZE) check. Skip 1 character if not.151// if (c < ASIZE)152// j += bc[y[j+m-1]];153// else154// j += 1155// #endif156// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF157// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.158// if (c < ASIZE)159// j += bc[y[j+m-1]];160// else161// j += m162// #endif163// }164// }165166if (icnt1 == -1) {167Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,168BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;169Register cnt1end = tmp2;170Register str2end = cnt2;171Register skipch = tmp2;172173// str1 length is >=8, so, we can read at least 1 register for cases when174// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for175// UL case. We'll re-read last character in inner pre-loop code to have176// single outer pre-loop load177const int firstStep = isL ? 7 : 3;178179const int ASIZE = 256;180const int STORED_BYTES = 32; // amount of bytes stored per instruction181sub(sp, sp, ASIZE);182mov(tmp5, ASIZE/STORED_BYTES); // loop iterations183mov(ch1, sp);184BIND(BM_INIT_LOOP);185stpq(v0, v0, Address(post(ch1, STORED_BYTES)));186subs(tmp5, tmp5, 1);187br(GT, BM_INIT_LOOP);188189sub(cnt1tmp, cnt1, 1);190mov(tmp5, str2);191add(str2end, str2, result_tmp, LSL, str2_chr_shift);192sub(ch2, cnt1, 1);193mov(tmp3, str1);194BIND(BCLOOP);195(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));196if (!str1_isL) {197subs(zr, ch1, ASIZE);198br(HS, BCSKIP);199}200strb(ch2, Address(sp, ch1));201BIND(BCSKIP);202subs(ch2, ch2, 1);203br(GT, BCLOOP);204205add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1206if (str1_isL == str2_isL) {207// load last 8 bytes (8LL/4UU symbols)208ldr(tmp6, Address(tmp6, -wordSize));209} else {210ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)211// convert Latin1 to UTF. We'll have to wait until load completed, but212// it's still faster than per-character loads+checks213lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]214ubfx(ch1, tmp6, 8, 8); // str1[N-2]215ubfx(ch2, tmp6, 16, 8); // str1[N-3]216andr(tmp6, tmp6, 0xFF); // str1[N-4]217orr(ch2, ch1, ch2, LSL, 16);218orr(tmp6, tmp6, tmp3, LSL, 48);219orr(tmp6, tmp6, ch2, LSL, 16);220}221BIND(BMLOOPSTR2);222(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));223sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8224if (str1_isL == str2_isL) {225// re-init tmp3. It's for free because it's executed in parallel with226// load above. Alternative is to initialize it before loop, but it'll227// affect performance on in-order systems with 2 or more ld/st pipelines228lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));229}230if (!isL) { // UU/UL case231lsl(ch2, cnt1tmp, 1); // offset in bytes232}233cmp(tmp3, skipch);234br(NE, BMSKIP);235ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));236mov(ch1, tmp6);237if (isL) {238b(BMLOOPSTR1_AFTER_LOAD);239} else {240sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8241b(BMLOOPSTR1_CMP);242}243BIND(BMLOOPSTR1);244(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));245(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));246BIND(BMLOOPSTR1_AFTER_LOAD);247subs(cnt1tmp, cnt1tmp, 1);248br(LT, BMLOOPSTR1_LASTCMP);249BIND(BMLOOPSTR1_CMP);250cmp(ch1, ch2);251br(EQ, BMLOOPSTR1);252BIND(BMSKIP);253if (!isL) {254// if we've met UTF symbol while searching Latin1 pattern, then we can255// skip cnt1 symbols256if (str1_isL != str2_isL) {257mov(result_tmp, cnt1);258} else {259mov(result_tmp, 1);260}261subs(zr, skipch, ASIZE);262br(HS, BMADV);263}264ldrb(result_tmp, Address(sp, skipch)); // load skip distance265BIND(BMADV);266sub(cnt1tmp, cnt1, 1);267add(str2, str2, result_tmp, LSL, str2_chr_shift);268cmp(str2, str2end);269br(LE, BMLOOPSTR2);270add(sp, sp, ASIZE);271b(NOMATCH);272BIND(BMLOOPSTR1_LASTCMP);273cmp(ch1, ch2);274br(NE, BMSKIP);275BIND(BMMATCH);276sub(result, str2, tmp5);277if (!str2_isL) lsr(result, result, 1);278add(sp, sp, ASIZE);279b(DONE);280281BIND(LINEARSTUB);282cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm283br(LT, LINEAR_MEDIUM);284mov(result, zr);285RuntimeAddress stub = NULL;286if (isL) {287stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());288assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");289} else if (str1_isL) {290stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());291assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");292} else {293stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());294assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");295}296trampoline_call(stub);297b(DONE);298}299300BIND(LINEARSEARCH);301{302Label DO1, DO2, DO3;303304Register str2tmp = tmp2;305Register first = tmp3;306307if (icnt1 == -1)308{309Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;310311cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));312br(LT, DOSHORT);313BIND(LINEAR_MEDIUM);314(this->*str1_load_1chr)(first, Address(str1));315lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));316sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);317lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));318sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);319320BIND(FIRST_LOOP);321(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));322cmp(first, ch2);323br(EQ, STR1_LOOP);324BIND(STR2_NEXT);325adds(cnt2_neg, cnt2_neg, str2_chr_size);326br(LE, FIRST_LOOP);327b(NOMATCH);328329BIND(STR1_LOOP);330adds(cnt1tmp, cnt1_neg, str1_chr_size);331add(cnt2tmp, cnt2_neg, str2_chr_size);332br(GE, MATCH);333334BIND(STR1_NEXT);335(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));336(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));337cmp(ch1, ch2);338br(NE, STR2_NEXT);339adds(cnt1tmp, cnt1tmp, str1_chr_size);340add(cnt2tmp, cnt2tmp, str2_chr_size);341br(LT, STR1_NEXT);342b(MATCH);343344BIND(DOSHORT);345if (str1_isL == str2_isL) {346cmp(cnt1, (u1)2);347br(LT, DO1);348br(GT, DO3);349}350}351352if (icnt1 == 4) {353Label CH1_LOOP;354355(this->*load_4chr)(ch1, str1);356sub(result_tmp, cnt2, 4);357lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));358sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);359360BIND(CH1_LOOP);361(this->*load_4chr)(ch2, Address(str2, cnt2_neg));362cmp(ch1, ch2);363br(EQ, MATCH);364adds(cnt2_neg, cnt2_neg, str2_chr_size);365br(LE, CH1_LOOP);366b(NOMATCH);367}368369if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {370Label CH1_LOOP;371372BIND(DO2);373(this->*load_2chr)(ch1, str1);374if (icnt1 == 2) {375sub(result_tmp, cnt2, 2);376}377lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));378sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);379BIND(CH1_LOOP);380(this->*load_2chr)(ch2, Address(str2, cnt2_neg));381cmp(ch1, ch2);382br(EQ, MATCH);383adds(cnt2_neg, cnt2_neg, str2_chr_size);384br(LE, CH1_LOOP);385b(NOMATCH);386}387388if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {389Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;390391BIND(DO3);392(this->*load_2chr)(first, str1);393(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));394if (icnt1 == 3) {395sub(result_tmp, cnt2, 3);396}397lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));398sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);399BIND(FIRST_LOOP);400(this->*load_2chr)(ch2, Address(str2, cnt2_neg));401cmpw(first, ch2);402br(EQ, STR1_LOOP);403BIND(STR2_NEXT);404adds(cnt2_neg, cnt2_neg, str2_chr_size);405br(LE, FIRST_LOOP);406b(NOMATCH);407408BIND(STR1_LOOP);409add(cnt2tmp, cnt2_neg, 2*str2_chr_size);410(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));411cmp(ch1, ch2);412br(NE, STR2_NEXT);413b(MATCH);414}415416if (icnt1 == -1 || icnt1 == 1) {417Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;418419BIND(DO1);420(this->*str1_load_1chr)(ch1, str1);421cmp(cnt2, (u1)8);422br(LT, DO1_SHORT);423424sub(result_tmp, cnt2, 8/str2_chr_size);425sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);426mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);427lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));428429if (str2_isL) {430orr(ch1, ch1, ch1, LSL, 8);431}432orr(ch1, ch1, ch1, LSL, 16);433orr(ch1, ch1, ch1, LSL, 32);434BIND(CH1_LOOP);435ldr(ch2, Address(str2, cnt2_neg));436eor(ch2, ch1, ch2);437sub(tmp1, ch2, tmp3);438orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);439bics(tmp1, tmp1, tmp2);440br(NE, HAS_ZERO);441adds(cnt2_neg, cnt2_neg, 8);442br(LT, CH1_LOOP);443444cmp(cnt2_neg, (u1)8);445mov(cnt2_neg, 0);446br(LT, CH1_LOOP);447b(NOMATCH);448449BIND(HAS_ZERO);450rev(tmp1, tmp1);451clz(tmp1, tmp1);452add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);453b(MATCH);454455BIND(DO1_SHORT);456mov(result_tmp, cnt2);457lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));458sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);459BIND(DO1_LOOP);460(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));461cmpw(ch1, ch2);462br(EQ, MATCH);463adds(cnt2_neg, cnt2_neg, str2_chr_size);464br(LT, DO1_LOOP);465}466}467BIND(NOMATCH);468mov(result, -1);469b(DONE);470BIND(MATCH);471add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);472BIND(DONE);473}474475typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);476typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);477478void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,479Register ch, Register result,480Register tmp1, Register tmp2, Register tmp3)481{482Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;483Register cnt1_neg = cnt1;484Register ch1 = rscratch1;485Register result_tmp = rscratch2;486487cbz(cnt1, NOMATCH);488489cmp(cnt1, (u1)4);490br(LT, DO1_SHORT);491492orr(ch, ch, ch, LSL, 16);493orr(ch, ch, ch, LSL, 32);494495sub(cnt1, cnt1, 4);496mov(result_tmp, cnt1);497lea(str1, Address(str1, cnt1, Address::uxtw(1)));498sub(cnt1_neg, zr, cnt1, LSL, 1);499500mov(tmp3, 0x0001000100010001);501502BIND(CH1_LOOP);503ldr(ch1, Address(str1, cnt1_neg));504eor(ch1, ch, ch1);505sub(tmp1, ch1, tmp3);506orr(tmp2, ch1, 0x7fff7fff7fff7fff);507bics(tmp1, tmp1, tmp2);508br(NE, HAS_ZERO);509adds(cnt1_neg, cnt1_neg, 8);510br(LT, CH1_LOOP);511512cmp(cnt1_neg, (u1)8);513mov(cnt1_neg, 0);514br(LT, CH1_LOOP);515b(NOMATCH);516517BIND(HAS_ZERO);518rev(tmp1, tmp1);519clz(tmp1, tmp1);520add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);521b(MATCH);522523BIND(DO1_SHORT);524mov(result_tmp, cnt1);525lea(str1, Address(str1, cnt1, Address::uxtw(1)));526sub(cnt1_neg, zr, cnt1, LSL, 1);527BIND(DO1_LOOP);528ldrh(ch1, Address(str1, cnt1_neg));529cmpw(ch, ch1);530br(EQ, MATCH);531adds(cnt1_neg, cnt1_neg, 2);532br(LT, DO1_LOOP);533BIND(NOMATCH);534mov(result, -1);535b(DONE);536BIND(MATCH);537add(result, result_tmp, cnt1_neg, ASR, 1);538BIND(DONE);539}540541void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,542Register ch, Register result,543Register tmp1, Register tmp2, Register tmp3)544{545Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;546Register cnt1_neg = cnt1;547Register ch1 = rscratch1;548Register result_tmp = rscratch2;549550cbz(cnt1, NOMATCH);551552cmp(cnt1, (u1)8);553br(LT, DO1_SHORT);554555orr(ch, ch, ch, LSL, 8);556orr(ch, ch, ch, LSL, 16);557orr(ch, ch, ch, LSL, 32);558559sub(cnt1, cnt1, 8);560mov(result_tmp, cnt1);561lea(str1, Address(str1, cnt1));562sub(cnt1_neg, zr, cnt1);563564mov(tmp3, 0x0101010101010101);565566BIND(CH1_LOOP);567ldr(ch1, Address(str1, cnt1_neg));568eor(ch1, ch, ch1);569sub(tmp1, ch1, tmp3);570orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);571bics(tmp1, tmp1, tmp2);572br(NE, HAS_ZERO);573adds(cnt1_neg, cnt1_neg, 8);574br(LT, CH1_LOOP);575576cmp(cnt1_neg, (u1)8);577mov(cnt1_neg, 0);578br(LT, CH1_LOOP);579b(NOMATCH);580581BIND(HAS_ZERO);582rev(tmp1, tmp1);583clz(tmp1, tmp1);584add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);585b(MATCH);586587BIND(DO1_SHORT);588mov(result_tmp, cnt1);589lea(str1, Address(str1, cnt1));590sub(cnt1_neg, zr, cnt1);591BIND(DO1_LOOP);592ldrb(ch1, Address(str1, cnt1_neg));593cmp(ch, ch1);594br(EQ, MATCH);595adds(cnt1_neg, cnt1_neg, 1);596br(LT, DO1_LOOP);597BIND(NOMATCH);598mov(result, -1);599b(DONE);600BIND(MATCH);601add(result, result_tmp, cnt1_neg);602BIND(DONE);603}604605// Compare strings.606void C2_MacroAssembler::string_compare(Register str1, Register str2,607Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,608FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {609Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,610DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,611SHORT_LOOP_START, TAIL_CHECK;612613bool isLL = ae == StrIntrinsicNode::LL;614bool isLU = ae == StrIntrinsicNode::LU;615bool isUL = ae == StrIntrinsicNode::UL;616617// The stub threshold for LL strings is: 72 (64 + 8) chars618// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)619// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)620const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);621622bool str1_isL = isLL || isLU;623bool str2_isL = isLL || isUL;624625int str1_chr_shift = str1_isL ? 0 : 1;626int str2_chr_shift = str2_isL ? 0 : 1;627int str1_chr_size = str1_isL ? 1 : 2;628int str2_chr_size = str2_isL ? 1 : 2;629int minCharsInWord = isLL ? wordSize : wordSize/2;630631FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;632chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :633(chr_insn)&MacroAssembler::ldrh;634chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :635(chr_insn)&MacroAssembler::ldrh;636uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :637(uxt_insn)&MacroAssembler::uxthw;638639BLOCK_COMMENT("string_compare {");640641// Bizzarely, the counts are passed in bytes, regardless of whether they642// are L or U strings, however the result is always in characters.643if (!str1_isL) asrw(cnt1, cnt1, 1);644if (!str2_isL) asrw(cnt2, cnt2, 1);645646// Compute the minimum of the string lengths and save the difference.647subsw(result, cnt1, cnt2);648cselw(cnt2, cnt1, cnt2, Assembler::LE); // min649650// A very short string651cmpw(cnt2, minCharsInWord);652br(Assembler::LE, SHORT_STRING);653654// Compare longwords655// load first parts of strings and finish initialization while loading656{657if (str1_isL == str2_isL) { // LL or UU658ldr(tmp1, Address(str1));659cmp(str1, str2);660br(Assembler::EQ, DONE);661ldr(tmp2, Address(str2));662cmp(cnt2, stub_threshold);663br(GE, STUB);664subsw(cnt2, cnt2, minCharsInWord);665br(EQ, TAIL_CHECK);666lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));667lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));668sub(cnt2, zr, cnt2, LSL, str2_chr_shift);669} else if (isLU) {670ldrs(vtmp, Address(str1));671ldr(tmp2, Address(str2));672cmp(cnt2, stub_threshold);673br(GE, STUB);674subw(cnt2, cnt2, 4);675eor(vtmpZ, T16B, vtmpZ, vtmpZ);676lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));677lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));678zip1(vtmp, T8B, vtmp, vtmpZ);679sub(cnt1, zr, cnt2, LSL, str1_chr_shift);680sub(cnt2, zr, cnt2, LSL, str2_chr_shift);681add(cnt1, cnt1, 4);682fmovd(tmp1, vtmp);683} else { // UL case684ldr(tmp1, Address(str1));685ldrs(vtmp, Address(str2));686cmp(cnt2, stub_threshold);687br(GE, STUB);688subw(cnt2, cnt2, 4);689lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));690eor(vtmpZ, T16B, vtmpZ, vtmpZ);691lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));692sub(cnt1, zr, cnt2, LSL, str1_chr_shift);693zip1(vtmp, T8B, vtmp, vtmpZ);694sub(cnt2, zr, cnt2, LSL, str2_chr_shift);695add(cnt1, cnt1, 8);696fmovd(tmp2, vtmp);697}698adds(cnt2, cnt2, isUL ? 4 : 8);699br(GE, TAIL);700eor(rscratch2, tmp1, tmp2);701cbnz(rscratch2, DIFF);702// main loop703bind(NEXT_WORD);704if (str1_isL == str2_isL) {705ldr(tmp1, Address(str1, cnt2));706ldr(tmp2, Address(str2, cnt2));707adds(cnt2, cnt2, 8);708} else if (isLU) {709ldrs(vtmp, Address(str1, cnt1));710ldr(tmp2, Address(str2, cnt2));711add(cnt1, cnt1, 4);712zip1(vtmp, T8B, vtmp, vtmpZ);713fmovd(tmp1, vtmp);714adds(cnt2, cnt2, 8);715} else { // UL716ldrs(vtmp, Address(str2, cnt2));717ldr(tmp1, Address(str1, cnt1));718zip1(vtmp, T8B, vtmp, vtmpZ);719add(cnt1, cnt1, 8);720fmovd(tmp2, vtmp);721adds(cnt2, cnt2, 4);722}723br(GE, TAIL);724725eor(rscratch2, tmp1, tmp2);726cbz(rscratch2, NEXT_WORD);727b(DIFF);728bind(TAIL);729eor(rscratch2, tmp1, tmp2);730cbnz(rscratch2, DIFF);731// Last longword. In the case where length == 4 we compare the732// same longword twice, but that's still faster than another733// conditional branch.734if (str1_isL == str2_isL) {735ldr(tmp1, Address(str1));736ldr(tmp2, Address(str2));737} else if (isLU) {738ldrs(vtmp, Address(str1));739ldr(tmp2, Address(str2));740zip1(vtmp, T8B, vtmp, vtmpZ);741fmovd(tmp1, vtmp);742} else { // UL743ldrs(vtmp, Address(str2));744ldr(tmp1, Address(str1));745zip1(vtmp, T8B, vtmp, vtmpZ);746fmovd(tmp2, vtmp);747}748bind(TAIL_CHECK);749eor(rscratch2, tmp1, tmp2);750cbz(rscratch2, DONE);751752// Find the first different characters in the longwords and753// compute their difference.754bind(DIFF);755rev(rscratch2, rscratch2);756clz(rscratch2, rscratch2);757andr(rscratch2, rscratch2, isLL ? -8 : -16);758lsrv(tmp1, tmp1, rscratch2);759(this->*ext_chr)(tmp1, tmp1);760lsrv(tmp2, tmp2, rscratch2);761(this->*ext_chr)(tmp2, tmp2);762subw(result, tmp1, tmp2);763b(DONE);764}765766bind(STUB);767RuntimeAddress stub = NULL;768switch(ae) {769case StrIntrinsicNode::LL:770stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());771break;772case StrIntrinsicNode::UU:773stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());774break;775case StrIntrinsicNode::LU:776stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());777break;778case StrIntrinsicNode::UL:779stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());780break;781default:782ShouldNotReachHere();783}784assert(stub.target() != NULL, "compare_long_string stub has not been generated");785trampoline_call(stub);786b(DONE);787788bind(SHORT_STRING);789// Is the minimum length zero?790cbz(cnt2, DONE);791// arrange code to do most branches while loading and loading next characters792// while comparing previous793(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));794subs(cnt2, cnt2, 1);795br(EQ, SHORT_LAST_INIT);796(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));797b(SHORT_LOOP_START);798bind(SHORT_LOOP);799subs(cnt2, cnt2, 1);800br(EQ, SHORT_LAST);801bind(SHORT_LOOP_START);802(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));803(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));804cmp(tmp1, cnt1);805br(NE, SHORT_LOOP_TAIL);806subs(cnt2, cnt2, 1);807br(EQ, SHORT_LAST2);808(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));809(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));810cmp(tmp2, rscratch1);811br(EQ, SHORT_LOOP);812sub(result, tmp2, rscratch1);813b(DONE);814bind(SHORT_LOOP_TAIL);815sub(result, tmp1, cnt1);816b(DONE);817bind(SHORT_LAST2);818cmp(tmp2, rscratch1);819br(EQ, DONE);820sub(result, tmp2, rscratch1);821822b(DONE);823bind(SHORT_LAST_INIT);824(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));825bind(SHORT_LAST);826cmp(tmp1, cnt1);827br(EQ, DONE);828sub(result, tmp1, cnt1);829830bind(DONE);831832BLOCK_COMMENT("} string_compare");833}834835836