Path: blob/master/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
64441 views
/*1* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "asm/assembler.hpp"26#include "asm/assembler.inline.hpp"27#include "opto/c2_MacroAssembler.hpp"28#include "opto/intrinsicnode.hpp"29#include "opto/subnode.hpp"30#include "runtime/stubRoutines.hpp"3132#ifdef PRODUCT33#define BLOCK_COMMENT(str) /* nothing */34#define STOP(error) stop(error)35#else36#define BLOCK_COMMENT(str) block_comment(str)37#define STOP(error) block_comment(error); stop(error)38#endif3940#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")4142typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);4344// Search for str1 in str2 and return index or -145void C2_MacroAssembler::string_indexof(Register str2, Register str1,46Register cnt2, Register cnt1,47Register tmp1, Register tmp2,48Register tmp3, Register tmp4,49Register tmp5, Register tmp6,50int icnt1, Register result, int ae) {51// NOTE: tmp5, tmp6 can be zr depending on specific method version52Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;5354Register ch1 = rscratch1;55Register ch2 = rscratch2;56Register cnt1tmp = tmp1;57Register cnt2tmp = tmp2;58Register cnt1_neg = cnt1;59Register cnt2_neg = cnt2;60Register result_tmp = tmp4;6162bool isL = ae == StrIntrinsicNode::LL;6364bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;65bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;66int str1_chr_shift = str1_isL ? 0:1;67int str2_chr_shift = str2_isL ? 0:1;68int str1_chr_size = str1_isL ? 1:2;69int str2_chr_size = str2_isL ? 1:2;70chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :71(chr_insn)&MacroAssembler::ldrh;72chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :73(chr_insn)&MacroAssembler::ldrh;74chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;75chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;7677// Note, inline_string_indexOf() generates checks:78// if (substr.count > string.count) return -1;79// if (substr.count == 0) return 0;8081// We have two strings, a source string in str2, cnt2 and a pattern string82// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.8384// For larger pattern and source we use a simplified Boyer Moore algorithm.85// With a small pattern and source we use linear scan.8687if (icnt1 == -1) {88sub(result_tmp, cnt2, cnt1);89cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 25690br(LT, LINEARSEARCH);91dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty92subs(zr, cnt1, 256);93lsr(tmp1, cnt2, 2);94ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM95br(GE, LINEARSTUB);96}9798// The Boyer Moore alogorithm is based on the description here:-99//100// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm101//102// This describes and algorithm with 2 shift rules. The 'Bad Character' rule103// and the 'Good Suffix' rule.104//105// These rules are essentially heuristics for how far we can shift the106// pattern along the search string.107//108// The implementation here uses the 'Bad Character' rule only because of the109// complexity of initialisation for the 'Good Suffix' rule.110//111// This is also known as the Boyer-Moore-Horspool algorithm:-112//113// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm114//115// This particular implementation has few java-specific optimizations.116//117// #define ASIZE 256118//119// int bm(unsigned char *x, int m, unsigned char *y, int n) {120// int i, j;121// unsigned c;122// unsigned char bc[ASIZE];123//124// /* Preprocessing */125// for (i = 0; i < ASIZE; ++i)126// bc[i] = m;127// for (i = 0; i < m - 1; ) {128// c = x[i];129// ++i;130// // c < 256 for Latin1 string, so, no need for branch131// #ifdef PATTERN_STRING_IS_LATIN1132// bc[c] = m - i;133// #else134// if (c < ASIZE) bc[c] = m - i;135// #endif136// }137//138// /* Searching */139// j = 0;140// while (j <= n - m) {141// c = y[i+j];142// if (x[m-1] == c)143// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);144// if (i < 0) return j;145// // c < 256 for Latin1 string, so, no need for branch146// #ifdef SOURCE_STRING_IS_LATIN1147// // LL case: (c< 256) always true. Remove branch148// j += bc[y[j+m-1]];149// #endif150// #ifndef PATTERN_STRING_IS_UTF151// // UU case: need if (c<ASIZE) check. Skip 1 character if not.152// if (c < ASIZE)153// j += bc[y[j+m-1]];154// else155// j += 1156// #endif157// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF158// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.159// if (c < ASIZE)160// j += bc[y[j+m-1]];161// else162// j += m163// #endif164// }165// }166167if (icnt1 == -1) {168Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,169BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;170Register cnt1end = tmp2;171Register str2end = cnt2;172Register skipch = tmp2;173174// str1 length is >=8, so, we can read at least 1 register for cases when175// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for176// UL case. We'll re-read last character in inner pre-loop code to have177// single outer pre-loop load178const int firstStep = isL ? 7 : 3;179180const int ASIZE = 256;181const int STORED_BYTES = 32; // amount of bytes stored per instruction182sub(sp, sp, ASIZE);183mov(tmp5, ASIZE/STORED_BYTES); // loop iterations184mov(ch1, sp);185BIND(BM_INIT_LOOP);186stpq(v0, v0, Address(post(ch1, STORED_BYTES)));187subs(tmp5, tmp5, 1);188br(GT, BM_INIT_LOOP);189190sub(cnt1tmp, cnt1, 1);191mov(tmp5, str2);192add(str2end, str2, result_tmp, LSL, str2_chr_shift);193sub(ch2, cnt1, 1);194mov(tmp3, str1);195BIND(BCLOOP);196(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));197if (!str1_isL) {198subs(zr, ch1, ASIZE);199br(HS, BCSKIP);200}201strb(ch2, Address(sp, ch1));202BIND(BCSKIP);203subs(ch2, ch2, 1);204br(GT, BCLOOP);205206add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1207if (str1_isL == str2_isL) {208// load last 8 bytes (8LL/4UU symbols)209ldr(tmp6, Address(tmp6, -wordSize));210} else {211ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)212// convert Latin1 to UTF. We'll have to wait until load completed, but213// it's still faster than per-character loads+checks214lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]215ubfx(ch1, tmp6, 8, 8); // str1[N-2]216ubfx(ch2, tmp6, 16, 8); // str1[N-3]217andr(tmp6, tmp6, 0xFF); // str1[N-4]218orr(ch2, ch1, ch2, LSL, 16);219orr(tmp6, tmp6, tmp3, LSL, 48);220orr(tmp6, tmp6, ch2, LSL, 16);221}222BIND(BMLOOPSTR2);223(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));224sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8225if (str1_isL == str2_isL) {226// re-init tmp3. It's for free because it's executed in parallel with227// load above. Alternative is to initialize it before loop, but it'll228// affect performance on in-order systems with 2 or more ld/st pipelines229lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));230}231if (!isL) { // UU/UL case232lsl(ch2, cnt1tmp, 1); // offset in bytes233}234cmp(tmp3, skipch);235br(NE, BMSKIP);236ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));237mov(ch1, tmp6);238if (isL) {239b(BMLOOPSTR1_AFTER_LOAD);240} else {241sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8242b(BMLOOPSTR1_CMP);243}244BIND(BMLOOPSTR1);245(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));246(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));247BIND(BMLOOPSTR1_AFTER_LOAD);248subs(cnt1tmp, cnt1tmp, 1);249br(LT, BMLOOPSTR1_LASTCMP);250BIND(BMLOOPSTR1_CMP);251cmp(ch1, ch2);252br(EQ, BMLOOPSTR1);253BIND(BMSKIP);254if (!isL) {255// if we've met UTF symbol while searching Latin1 pattern, then we can256// skip cnt1 symbols257if (str1_isL != str2_isL) {258mov(result_tmp, cnt1);259} else {260mov(result_tmp, 1);261}262subs(zr, skipch, ASIZE);263br(HS, BMADV);264}265ldrb(result_tmp, Address(sp, skipch)); // load skip distance266BIND(BMADV);267sub(cnt1tmp, cnt1, 1);268add(str2, str2, result_tmp, LSL, str2_chr_shift);269cmp(str2, str2end);270br(LE, BMLOOPSTR2);271add(sp, sp, ASIZE);272b(NOMATCH);273BIND(BMLOOPSTR1_LASTCMP);274cmp(ch1, ch2);275br(NE, BMSKIP);276BIND(BMMATCH);277sub(result, str2, tmp5);278if (!str2_isL) lsr(result, result, 1);279add(sp, sp, ASIZE);280b(DONE);281282BIND(LINEARSTUB);283cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm284br(LT, LINEAR_MEDIUM);285mov(result, zr);286RuntimeAddress stub = NULL;287if (isL) {288stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());289assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");290} else if (str1_isL) {291stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());292assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");293} else {294stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());295assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");296}297trampoline_call(stub);298b(DONE);299}300301BIND(LINEARSEARCH);302{303Label DO1, DO2, DO3;304305Register str2tmp = tmp2;306Register first = tmp3;307308if (icnt1 == -1)309{310Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;311312cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));313br(LT, DOSHORT);314BIND(LINEAR_MEDIUM);315(this->*str1_load_1chr)(first, Address(str1));316lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));317sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);318lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));319sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);320321BIND(FIRST_LOOP);322(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));323cmp(first, ch2);324br(EQ, STR1_LOOP);325BIND(STR2_NEXT);326adds(cnt2_neg, cnt2_neg, str2_chr_size);327br(LE, FIRST_LOOP);328b(NOMATCH);329330BIND(STR1_LOOP);331adds(cnt1tmp, cnt1_neg, str1_chr_size);332add(cnt2tmp, cnt2_neg, str2_chr_size);333br(GE, MATCH);334335BIND(STR1_NEXT);336(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));337(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));338cmp(ch1, ch2);339br(NE, STR2_NEXT);340adds(cnt1tmp, cnt1tmp, str1_chr_size);341add(cnt2tmp, cnt2tmp, str2_chr_size);342br(LT, STR1_NEXT);343b(MATCH);344345BIND(DOSHORT);346if (str1_isL == str2_isL) {347cmp(cnt1, (u1)2);348br(LT, DO1);349br(GT, DO3);350}351}352353if (icnt1 == 4) {354Label CH1_LOOP;355356(this->*load_4chr)(ch1, str1);357sub(result_tmp, cnt2, 4);358lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));359sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);360361BIND(CH1_LOOP);362(this->*load_4chr)(ch2, Address(str2, cnt2_neg));363cmp(ch1, ch2);364br(EQ, MATCH);365adds(cnt2_neg, cnt2_neg, str2_chr_size);366br(LE, CH1_LOOP);367b(NOMATCH);368}369370if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {371Label CH1_LOOP;372373BIND(DO2);374(this->*load_2chr)(ch1, str1);375if (icnt1 == 2) {376sub(result_tmp, cnt2, 2);377}378lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));379sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);380BIND(CH1_LOOP);381(this->*load_2chr)(ch2, Address(str2, cnt2_neg));382cmp(ch1, ch2);383br(EQ, MATCH);384adds(cnt2_neg, cnt2_neg, str2_chr_size);385br(LE, CH1_LOOP);386b(NOMATCH);387}388389if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {390Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;391392BIND(DO3);393(this->*load_2chr)(first, str1);394(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));395if (icnt1 == 3) {396sub(result_tmp, cnt2, 3);397}398lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));399sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);400BIND(FIRST_LOOP);401(this->*load_2chr)(ch2, Address(str2, cnt2_neg));402cmpw(first, ch2);403br(EQ, STR1_LOOP);404BIND(STR2_NEXT);405adds(cnt2_neg, cnt2_neg, str2_chr_size);406br(LE, FIRST_LOOP);407b(NOMATCH);408409BIND(STR1_LOOP);410add(cnt2tmp, cnt2_neg, 2*str2_chr_size);411(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));412cmp(ch1, ch2);413br(NE, STR2_NEXT);414b(MATCH);415}416417if (icnt1 == -1 || icnt1 == 1) {418Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;419420BIND(DO1);421(this->*str1_load_1chr)(ch1, str1);422cmp(cnt2, (u1)8);423br(LT, DO1_SHORT);424425sub(result_tmp, cnt2, 8/str2_chr_size);426sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);427mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);428lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));429430if (str2_isL) {431orr(ch1, ch1, ch1, LSL, 8);432}433orr(ch1, ch1, ch1, LSL, 16);434orr(ch1, ch1, ch1, LSL, 32);435BIND(CH1_LOOP);436ldr(ch2, Address(str2, cnt2_neg));437eor(ch2, ch1, ch2);438sub(tmp1, ch2, tmp3);439orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);440bics(tmp1, tmp1, tmp2);441br(NE, HAS_ZERO);442adds(cnt2_neg, cnt2_neg, 8);443br(LT, CH1_LOOP);444445cmp(cnt2_neg, (u1)8);446mov(cnt2_neg, 0);447br(LT, CH1_LOOP);448b(NOMATCH);449450BIND(HAS_ZERO);451rev(tmp1, tmp1);452clz(tmp1, tmp1);453add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);454b(MATCH);455456BIND(DO1_SHORT);457mov(result_tmp, cnt2);458lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));459sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);460BIND(DO1_LOOP);461(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));462cmpw(ch1, ch2);463br(EQ, MATCH);464adds(cnt2_neg, cnt2_neg, str2_chr_size);465br(LT, DO1_LOOP);466}467}468BIND(NOMATCH);469mov(result, -1);470b(DONE);471BIND(MATCH);472add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);473BIND(DONE);474}475476typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);477typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);478479void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,480Register ch, Register result,481Register tmp1, Register tmp2, Register tmp3)482{483Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;484Register cnt1_neg = cnt1;485Register ch1 = rscratch1;486Register result_tmp = rscratch2;487488cbz(cnt1, NOMATCH);489490cmp(cnt1, (u1)4);491br(LT, DO1_SHORT);492493orr(ch, ch, ch, LSL, 16);494orr(ch, ch, ch, LSL, 32);495496sub(cnt1, cnt1, 4);497mov(result_tmp, cnt1);498lea(str1, Address(str1, cnt1, Address::uxtw(1)));499sub(cnt1_neg, zr, cnt1, LSL, 1);500501mov(tmp3, 0x0001000100010001);502503BIND(CH1_LOOP);504ldr(ch1, Address(str1, cnt1_neg));505eor(ch1, ch, ch1);506sub(tmp1, ch1, tmp3);507orr(tmp2, ch1, 0x7fff7fff7fff7fff);508bics(tmp1, tmp1, tmp2);509br(NE, HAS_ZERO);510adds(cnt1_neg, cnt1_neg, 8);511br(LT, CH1_LOOP);512513cmp(cnt1_neg, (u1)8);514mov(cnt1_neg, 0);515br(LT, CH1_LOOP);516b(NOMATCH);517518BIND(HAS_ZERO);519rev(tmp1, tmp1);520clz(tmp1, tmp1);521add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);522b(MATCH);523524BIND(DO1_SHORT);525mov(result_tmp, cnt1);526lea(str1, Address(str1, cnt1, Address::uxtw(1)));527sub(cnt1_neg, zr, cnt1, LSL, 1);528BIND(DO1_LOOP);529ldrh(ch1, Address(str1, cnt1_neg));530cmpw(ch, ch1);531br(EQ, MATCH);532adds(cnt1_neg, cnt1_neg, 2);533br(LT, DO1_LOOP);534BIND(NOMATCH);535mov(result, -1);536b(DONE);537BIND(MATCH);538add(result, result_tmp, cnt1_neg, ASR, 1);539BIND(DONE);540}541542void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,543Register ch, Register result,544Register tmp1, Register tmp2, Register tmp3)545{546Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;547Register cnt1_neg = cnt1;548Register ch1 = rscratch1;549Register result_tmp = rscratch2;550551cbz(cnt1, NOMATCH);552553cmp(cnt1, (u1)8);554br(LT, DO1_SHORT);555556orr(ch, ch, ch, LSL, 8);557orr(ch, ch, ch, LSL, 16);558orr(ch, ch, ch, LSL, 32);559560sub(cnt1, cnt1, 8);561mov(result_tmp, cnt1);562lea(str1, Address(str1, cnt1));563sub(cnt1_neg, zr, cnt1);564565mov(tmp3, 0x0101010101010101);566567BIND(CH1_LOOP);568ldr(ch1, Address(str1, cnt1_neg));569eor(ch1, ch, ch1);570sub(tmp1, ch1, tmp3);571orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);572bics(tmp1, tmp1, tmp2);573br(NE, HAS_ZERO);574adds(cnt1_neg, cnt1_neg, 8);575br(LT, CH1_LOOP);576577cmp(cnt1_neg, (u1)8);578mov(cnt1_neg, 0);579br(LT, CH1_LOOP);580b(NOMATCH);581582BIND(HAS_ZERO);583rev(tmp1, tmp1);584clz(tmp1, tmp1);585add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);586b(MATCH);587588BIND(DO1_SHORT);589mov(result_tmp, cnt1);590lea(str1, Address(str1, cnt1));591sub(cnt1_neg, zr, cnt1);592BIND(DO1_LOOP);593ldrb(ch1, Address(str1, cnt1_neg));594cmp(ch, ch1);595br(EQ, MATCH);596adds(cnt1_neg, cnt1_neg, 1);597br(LT, DO1_LOOP);598BIND(NOMATCH);599mov(result, -1);600b(DONE);601BIND(MATCH);602add(result, result_tmp, cnt1_neg);603BIND(DONE);604}605606// Compare strings.607void C2_MacroAssembler::string_compare(Register str1, Register str2,608Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,609FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {610Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,611DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,612SHORT_LOOP_START, TAIL_CHECK;613614bool isLL = ae == StrIntrinsicNode::LL;615bool isLU = ae == StrIntrinsicNode::LU;616bool isUL = ae == StrIntrinsicNode::UL;617618// The stub threshold for LL strings is: 72 (64 + 8) chars619// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)620// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)621const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);622623bool str1_isL = isLL || isLU;624bool str2_isL = isLL || isUL;625626int str1_chr_shift = str1_isL ? 0 : 1;627int str2_chr_shift = str2_isL ? 0 : 1;628int str1_chr_size = str1_isL ? 1 : 2;629int str2_chr_size = str2_isL ? 1 : 2;630int minCharsInWord = isLL ? wordSize : wordSize/2;631632FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;633chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :634(chr_insn)&MacroAssembler::ldrh;635chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :636(chr_insn)&MacroAssembler::ldrh;637uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :638(uxt_insn)&MacroAssembler::uxthw;639640BLOCK_COMMENT("string_compare {");641642// Bizzarely, the counts are passed in bytes, regardless of whether they643// are L or U strings, however the result is always in characters.644if (!str1_isL) asrw(cnt1, cnt1, 1);645if (!str2_isL) asrw(cnt2, cnt2, 1);646647// Compute the minimum of the string lengths and save the difference.648subsw(result, cnt1, cnt2);649cselw(cnt2, cnt1, cnt2, Assembler::LE); // min650651// A very short string652cmpw(cnt2, minCharsInWord);653br(Assembler::LE, SHORT_STRING);654655// Compare longwords656// load first parts of strings and finish initialization while loading657{658if (str1_isL == str2_isL) { // LL or UU659ldr(tmp1, Address(str1));660cmp(str1, str2);661br(Assembler::EQ, DONE);662ldr(tmp2, Address(str2));663cmp(cnt2, stub_threshold);664br(GE, STUB);665subsw(cnt2, cnt2, minCharsInWord);666br(EQ, TAIL_CHECK);667lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));668lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));669sub(cnt2, zr, cnt2, LSL, str2_chr_shift);670} else if (isLU) {671ldrs(vtmp, Address(str1));672ldr(tmp2, Address(str2));673cmp(cnt2, stub_threshold);674br(GE, STUB);675subw(cnt2, cnt2, 4);676eor(vtmpZ, T16B, vtmpZ, vtmpZ);677lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));678lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));679zip1(vtmp, T8B, vtmp, vtmpZ);680sub(cnt1, zr, cnt2, LSL, str1_chr_shift);681sub(cnt2, zr, cnt2, LSL, str2_chr_shift);682add(cnt1, cnt1, 4);683fmovd(tmp1, vtmp);684} else { // UL case685ldr(tmp1, Address(str1));686ldrs(vtmp, Address(str2));687cmp(cnt2, stub_threshold);688br(GE, STUB);689subw(cnt2, cnt2, 4);690lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));691eor(vtmpZ, T16B, vtmpZ, vtmpZ);692lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));693sub(cnt1, zr, cnt2, LSL, str1_chr_shift);694zip1(vtmp, T8B, vtmp, vtmpZ);695sub(cnt2, zr, cnt2, LSL, str2_chr_shift);696add(cnt1, cnt1, 8);697fmovd(tmp2, vtmp);698}699adds(cnt2, cnt2, isUL ? 4 : 8);700br(GE, TAIL);701eor(rscratch2, tmp1, tmp2);702cbnz(rscratch2, DIFF);703// main loop704bind(NEXT_WORD);705if (str1_isL == str2_isL) {706ldr(tmp1, Address(str1, cnt2));707ldr(tmp2, Address(str2, cnt2));708adds(cnt2, cnt2, 8);709} else if (isLU) {710ldrs(vtmp, Address(str1, cnt1));711ldr(tmp2, Address(str2, cnt2));712add(cnt1, cnt1, 4);713zip1(vtmp, T8B, vtmp, vtmpZ);714fmovd(tmp1, vtmp);715adds(cnt2, cnt2, 8);716} else { // UL717ldrs(vtmp, Address(str2, cnt2));718ldr(tmp1, Address(str1, cnt1));719zip1(vtmp, T8B, vtmp, vtmpZ);720add(cnt1, cnt1, 8);721fmovd(tmp2, vtmp);722adds(cnt2, cnt2, 4);723}724br(GE, TAIL);725726eor(rscratch2, tmp1, tmp2);727cbz(rscratch2, NEXT_WORD);728b(DIFF);729bind(TAIL);730eor(rscratch2, tmp1, tmp2);731cbnz(rscratch2, DIFF);732// Last longword. In the case where length == 4 we compare the733// same longword twice, but that's still faster than another734// conditional branch.735if (str1_isL == str2_isL) {736ldr(tmp1, Address(str1));737ldr(tmp2, Address(str2));738} else if (isLU) {739ldrs(vtmp, Address(str1));740ldr(tmp2, Address(str2));741zip1(vtmp, T8B, vtmp, vtmpZ);742fmovd(tmp1, vtmp);743} else { // UL744ldrs(vtmp, Address(str2));745ldr(tmp1, Address(str1));746zip1(vtmp, T8B, vtmp, vtmpZ);747fmovd(tmp2, vtmp);748}749bind(TAIL_CHECK);750eor(rscratch2, tmp1, tmp2);751cbz(rscratch2, DONE);752753// Find the first different characters in the longwords and754// compute their difference.755bind(DIFF);756rev(rscratch2, rscratch2);757clz(rscratch2, rscratch2);758andr(rscratch2, rscratch2, isLL ? -8 : -16);759lsrv(tmp1, tmp1, rscratch2);760(this->*ext_chr)(tmp1, tmp1);761lsrv(tmp2, tmp2, rscratch2);762(this->*ext_chr)(tmp2, tmp2);763subw(result, tmp1, tmp2);764b(DONE);765}766767bind(STUB);768RuntimeAddress stub = NULL;769switch(ae) {770case StrIntrinsicNode::LL:771stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());772break;773case StrIntrinsicNode::UU:774stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());775break;776case StrIntrinsicNode::LU:777stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());778break;779case StrIntrinsicNode::UL:780stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());781break;782default:783ShouldNotReachHere();784}785assert(stub.target() != NULL, "compare_long_string stub has not been generated");786trampoline_call(stub);787b(DONE);788789bind(SHORT_STRING);790// Is the minimum length zero?791cbz(cnt2, DONE);792// arrange code to do most branches while loading and loading next characters793// while comparing previous794(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));795subs(cnt2, cnt2, 1);796br(EQ, SHORT_LAST_INIT);797(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));798b(SHORT_LOOP_START);799bind(SHORT_LOOP);800subs(cnt2, cnt2, 1);801br(EQ, SHORT_LAST);802bind(SHORT_LOOP_START);803(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));804(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));805cmp(tmp1, cnt1);806br(NE, SHORT_LOOP_TAIL);807subs(cnt2, cnt2, 1);808br(EQ, SHORT_LAST2);809(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));810(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));811cmp(tmp2, rscratch1);812br(EQ, SHORT_LOOP);813sub(result, tmp2, rscratch1);814b(DONE);815bind(SHORT_LOOP_TAIL);816sub(result, tmp1, cnt1);817b(DONE);818bind(SHORT_LAST2);819cmp(tmp2, rscratch1);820br(EQ, DONE);821sub(result, tmp2, rscratch1);822823b(DONE);824bind(SHORT_LAST_INIT);825(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));826bind(SHORT_LAST);827cmp(tmp1, cnt1);828br(EQ, DONE);829sub(result, tmp1, cnt1);830831bind(DONE);832833BLOCK_COMMENT("} string_compare");834}835836void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,837FloatRegister src2, int cond, bool isQ) {838SIMD_Arrangement size = esize2arrangement(type2aelembytes(bt), isQ);839if (bt == T_FLOAT || bt == T_DOUBLE) {840switch (cond) {841case BoolTest::eq: fcmeq(dst, size, src1, src2); break;842case BoolTest::ne: {843fcmeq(dst, size, src1, src2);844notr(dst, T16B, dst);845break;846}847case BoolTest::ge: fcmge(dst, size, src1, src2); break;848case BoolTest::gt: fcmgt(dst, size, src1, src2); break;849case BoolTest::le: fcmge(dst, size, src2, src1); break;850case BoolTest::lt: fcmgt(dst, size, src2, src1); break;851default:852assert(false, "unsupported");853ShouldNotReachHere();854}855} else {856switch (cond) {857case BoolTest::eq: cmeq(dst, size, src1, src2); break;858case BoolTest::ne: {859cmeq(dst, size, src1, src2);860notr(dst, T16B, dst);861break;862}863case BoolTest::ge: cmge(dst, size, src1, src2); break;864case BoolTest::gt: cmgt(dst, size, src1, src2); break;865case BoolTest::le: cmge(dst, size, src2, src1); break;866case BoolTest::lt: cmgt(dst, size, src2, src1); break;867case BoolTest::uge: cmhs(dst, size, src1, src2); break;868case BoolTest::ugt: cmhi(dst, size, src1, src2); break;869case BoolTest::ult: cmhi(dst, size, src2, src1); break;870case BoolTest::ule: cmhs(dst, size, src2, src1); break;871default:872assert(false, "unsupported");873ShouldNotReachHere();874}875}876}877878879