Path: blob/master/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
40930 views
/*1* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "asm/assembler.hpp"26#include "asm/assembler.inline.hpp"27#include "opto/c2_MacroAssembler.hpp"28#include "opto/intrinsicnode.hpp"29#include "runtime/stubRoutines.hpp"3031#define BLOCK_COMMENT(str) block_comment(str)32#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")3334//------------------------------------------------------35// Special String Intrinsics. Implementation36//------------------------------------------------------3738// Intrinsics for CompactStrings3940// Compress char[] to byte[].41// Restores: src, dst42// Uses: cnt43// Kills: tmp, Z_R0, Z_R1.44// Early clobber: result.45// Note:46// cnt is signed int. Do not rely on high word!47// counts # characters, not bytes.48// The result is the number of characters copied before the first incompatible character was found.49// If precise is true, the processing stops exactly at this point. Otherwise, the result may be off50// by a few bytes. The result always indicates the number of copied characters.51// When used as a character index, the returned value points to the first incompatible character.52//53// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:54// - Different number of characters may have been written to dead array (if precise is false).55// - Returns a number <cnt instead of 0. (Result gets compared with cnt.)56unsigned int C2_MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,57Register tmp, bool precise) {58assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);5960if (precise) {61BLOCK_COMMENT("encode_iso_array {");62} else {63BLOCK_COMMENT("string_compress {");64}65int block_start = offset();6667Register Rsrc = src;68Register Rdst = dst;69Register Rix = tmp;70Register Rcnt = cnt;71Register Rmask = result; // holds incompatibility check mask until result value is stored.72Label ScalarShortcut, AllDone;7374z_iilf(Rmask, 0xFF00FF00);75z_iihf(Rmask, 0xFF00FF00);7677#if 0 // Sacrifice shortcuts for code compactness78{79//---< shortcuts for short strings (very frequent) >---80// Strings with 4 and 8 characters were fond to occur very frequently.81// Therefore, we handle them right away with minimal overhead.82Label skipShortcut, skip4Shortcut, skip8Shortcut;83Register Rout = Z_R0;84z_chi(Rcnt, 4);85z_brne(skip4Shortcut); // 4 characters are very frequent86z_lg(Z_R0, 0, Rsrc); // Treat exactly 4 characters specially.87if (VM_Version::has_DistinctOpnds()) {88Rout = Z_R0;89z_ngrk(Rix, Z_R0, Rmask);90} else {91Rout = Rix;92z_lgr(Rix, Z_R0);93z_ngr(Z_R0, Rmask);94}95z_brnz(skipShortcut);96z_stcmh(Rout, 5, 0, Rdst);97z_stcm(Rout, 5, 2, Rdst);98z_lgfr(result, Rcnt);99z_bru(AllDone);100bind(skip4Shortcut);101102z_chi(Rcnt, 8);103z_brne(skip8Shortcut); // There's more to do...104z_lmg(Z_R0, Z_R1, 0, Rsrc); // Treat exactly 8 characters specially.105if (VM_Version::has_DistinctOpnds()) {106Rout = Z_R0;107z_ogrk(Rix, Z_R0, Z_R1);108z_ngr(Rix, Rmask);109} else {110Rout = Rix;111z_lgr(Rix, Z_R0);112z_ogr(Z_R0, Z_R1);113z_ngr(Z_R0, Rmask);114}115z_brnz(skipShortcut);116z_stcmh(Rout, 5, 0, Rdst);117z_stcm(Rout, 5, 2, Rdst);118z_stcmh(Z_R1, 5, 4, Rdst);119z_stcm(Z_R1, 5, 6, Rdst);120z_lgfr(result, Rcnt);121z_bru(AllDone);122123bind(skip8Shortcut);124clear_reg(Z_R0, true, false); // #characters already processed (none). Precond for scalar loop.125z_brl(ScalarShortcut); // Just a few characters126127bind(skipShortcut);128}129#endif130clear_reg(Z_R0); // make sure register is properly initialized.131132if (VM_Version::has_VectorFacility()) {133const int min_vcnt = 32; // Minimum #characters required to use vector instructions.134// Otherwise just do nothing in vector mode.135// Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).136const int log_min_vcnt = exact_log2(min_vcnt);137Label VectorLoop, VectorDone, VectorBreak;138139VectorRegister Vtmp1 = Z_V16;140VectorRegister Vtmp2 = Z_V17;141VectorRegister Vmask = Z_V18;142VectorRegister Vzero = Z_V19;143VectorRegister Vsrc_first = Z_V20;144VectorRegister Vsrc_last = Z_V23;145146assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error");147assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");148z_srak(Rix, Rcnt, log_min_vcnt); // # vector loop iterations149z_brz(VectorDone); // not enough data for vector loop150151z_vzero(Vzero); // all zeroes152z_vgmh(Vmask, 0, 7); // generate 0xff00 mask for all 2-byte elements153z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop154155bind(VectorLoop);156z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc);157add2reg(Rsrc, min_vcnt*2);158159//---< check for incompatible character >---160z_vo(Vtmp1, Z_V20, Z_V21);161z_vo(Vtmp2, Z_V22, Z_V23);162z_vo(Vtmp1, Vtmp1, Vtmp2);163z_vn(Vtmp1, Vtmp1, Vmask);164z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress.165z_bvnt(VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found.166// re-process data from current iteration in break handler.167168//---< pack & store characters >---169z_vpkh(Vtmp1, Z_V20, Z_V21); // pack (src1, src2) -> tmp1170z_vpkh(Vtmp2, Z_V22, Z_V23); // pack (src3, src4) -> tmp2171z_vstm(Vtmp1, Vtmp2, 0, Rdst); // store packed string172add2reg(Rdst, min_vcnt);173174z_brct(Rix, VectorLoop);175176z_bru(VectorDone);177178bind(VectorBreak);179add2reg(Rsrc, -min_vcnt*2); // Fix Rsrc. Rsrc was already updated, but Rdst and Rix are not.180z_sll(Rix, log_min_vcnt); // # chars processed so far in VectorLoop, excl. current iteration.181z_sr(Z_R0, Rix); // correct # chars processed in total.182183bind(VectorDone);184}185186{187const int min_cnt = 8; // Minimum #characters required to use unrolled loop.188// Otherwise just do nothing in unrolled loop.189// Must be multiple of 8.190const int log_min_cnt = exact_log2(min_cnt);191Label UnrolledLoop, UnrolledDone, UnrolledBreak;192193if (VM_Version::has_DistinctOpnds()) {194z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop195} else {196z_lr(Rix, Rcnt);197z_sr(Rix, Z_R0);198}199z_sra(Rix, log_min_cnt); // unrolled loop count200z_brz(UnrolledDone);201202bind(UnrolledLoop);203z_lmg(Z_R0, Z_R1, 0, Rsrc);204if (precise) {205z_ogr(Z_R1, Z_R0); // check all 8 chars for incompatibility206z_ngr(Z_R1, Rmask);207z_brnz(UnrolledBreak);208209z_lg(Z_R1, 8, Rsrc); // reload destroyed register210z_stcmh(Z_R0, 5, 0, Rdst);211z_stcm(Z_R0, 5, 2, Rdst);212} else {213z_stcmh(Z_R0, 5, 0, Rdst);214z_stcm(Z_R0, 5, 2, Rdst);215216z_ogr(Z_R0, Z_R1);217z_ngr(Z_R0, Rmask);218z_brnz(UnrolledBreak);219}220z_stcmh(Z_R1, 5, 4, Rdst);221z_stcm(Z_R1, 5, 6, Rdst);222223add2reg(Rsrc, min_cnt*2);224add2reg(Rdst, min_cnt);225z_brct(Rix, UnrolledLoop);226227z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop.228z_nilf(Z_R0, ~(min_cnt-1));229z_tmll(Rcnt, min_cnt-1);230z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop.231// Rix == 0 in all cases.232z_sllg(Z_R1, Rcnt, 1); // # src bytes already processed. Only lower 32 bits are valid!233// Z_R1 contents must be treated as unsigned operand! For huge strings,234// (Rcnt >= 2**30), the value may spill into the sign bit by sllg.235z_lgfr(result, Rcnt); // all characters processed.236z_slgfr(Rdst, Rcnt); // restore ptr237z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore238z_bru(AllDone);239240bind(UnrolledBreak);241z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop242z_nilf(Z_R0, ~(min_cnt-1));243z_sll(Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.244z_sr(Z_R0, Rix); // fix # chars processed OK so far.245if (!precise) {246z_lgfr(result, Z_R0);247z_sllg(Z_R1, Z_R0, 1); // # src bytes already processed. Only lower 32 bits are valid!248// Z_R1 contents must be treated as unsigned operand! For huge strings,249// (Rcnt >= 2**30), the value may spill into the sign bit by sllg.250z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written251// but ptrs were not updated yet.252z_slgfr(Rdst, Z_R0); // restore ptr253z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore254z_bru(AllDone);255}256bind(UnrolledDone);257}258259{260Label ScalarLoop, ScalarDone, ScalarBreak;261262bind(ScalarShortcut);263z_ltgfr(result, Rcnt);264z_brz(AllDone);265266#if 0 // Sacrifice shortcuts for code compactness267{268//---< Special treatment for very short strings (one or two characters) >---269// For these strings, we are sure that the above code was skipped.270// Thus, no registers were modified, register restore is not required.271Label ScalarDoit, Scalar2Char;272z_chi(Rcnt, 2);273z_brh(ScalarDoit);274z_llh(Z_R1, 0, Z_R0, Rsrc);275z_bre(Scalar2Char);276z_tmll(Z_R1, 0xff00);277z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed278z_brnaz(AllDone);279z_stc(Z_R1, 0, Z_R0, Rdst);280z_lghi(result, 1);281z_bru(AllDone);282283bind(Scalar2Char);284z_llh(Z_R0, 2, Z_R0, Rsrc);285z_tmll(Z_R1, 0xff00);286z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed287z_brnaz(AllDone);288z_stc(Z_R1, 0, Z_R0, Rdst);289z_tmll(Z_R0, 0xff00);290z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed291z_brnaz(AllDone);292z_stc(Z_R0, 1, Z_R0, Rdst);293z_lghi(result, 2);294z_bru(AllDone);295296bind(ScalarDoit);297}298#endif299300if (VM_Version::has_DistinctOpnds()) {301z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop302} else {303z_lr(Rix, Rcnt);304z_sr(Rix, Z_R0);305}306z_lgfr(result, Rcnt); // # processed characters (if all runs ok).307z_brz(ScalarDone); // uses CC from Rix calculation308309bind(ScalarLoop);310z_llh(Z_R1, 0, Z_R0, Rsrc);311z_tmll(Z_R1, 0xff00);312z_brnaz(ScalarBreak);313z_stc(Z_R1, 0, Z_R0, Rdst);314add2reg(Rsrc, 2);315add2reg(Rdst, 1);316z_brct(Rix, ScalarLoop);317318z_bru(ScalarDone);319320bind(ScalarBreak);321z_sr(result, Rix);322323bind(ScalarDone);324z_sgfr(Rdst, result); // restore ptr325z_sgfr(Rsrc, result); // restore ptr, double the element count for Rsrc restore326z_sgfr(Rsrc, result);327}328bind(AllDone);329330if (precise) {331BLOCK_COMMENT("} encode_iso_array");332} else {333BLOCK_COMMENT("} string_compress");334}335return offset() - block_start;336}337338// Inflate byte[] to char[].339unsigned int C2_MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {340int block_start = offset();341342BLOCK_COMMENT("string_inflate {");343344Register stop_char = Z_R0;345Register table = Z_R1;346Register src_addr = tmp;347348assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);349assert(dst->encoding()%2 == 0, "must be even reg");350assert(cnt->encoding()%2 == 1, "must be odd reg");351assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");352353StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT)354clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value.355lgr_if_needed(src_addr, src);356z_llgfr(cnt, cnt); // # src characters, must be a positive simm32.357358translate_ot(dst, src_addr, /* mask = */ 0x0001);359360BLOCK_COMMENT("} string_inflate");361362return offset() - block_start;363}364365// Inflate byte[] to char[].366// Restores: src, dst367// Uses: cnt368// Kills: tmp, Z_R0, Z_R1.369// Note:370// cnt is signed int. Do not rely on high word!371// counts # characters, not bytes.372unsigned int C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {373assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);374375BLOCK_COMMENT("string_inflate {");376int block_start = offset();377378Register Rcnt = cnt; // # characters (src: bytes, dst: char (2-byte)), remaining after current loop.379Register Rix = tmp; // loop index380Register Rsrc = src; // addr(src array)381Register Rdst = dst; // addr(dst array)382Label ScalarShortcut, AllDone;383384#if 0 // Sacrifice shortcuts for code compactness385{386//---< shortcuts for short strings (very frequent) >---387Label skipShortcut, skip4Shortcut;388z_ltr(Rcnt, Rcnt); // absolutely nothing to do for strings of len == 0.389z_brz(AllDone);390clear_reg(Z_R0); // make sure registers are properly initialized.391clear_reg(Z_R1);392z_chi(Rcnt, 4);393z_brne(skip4Shortcut); // 4 characters are very frequent394z_icm(Z_R0, 5, 0, Rsrc); // Treat exactly 4 characters specially.395z_icm(Z_R1, 5, 2, Rsrc);396z_stm(Z_R0, Z_R1, 0, Rdst);397z_bru(AllDone);398bind(skip4Shortcut);399400z_chi(Rcnt, 8);401z_brh(skipShortcut); // There's a lot to do...402z_lgfr(Z_R0, Rcnt); // remaining #characters (<= 8). Precond for scalar loop.403// This does not destroy the "register cleared" state of Z_R0.404z_brl(ScalarShortcut); // Just a few characters405z_icmh(Z_R0, 5, 0, Rsrc); // Treat exactly 8 characters specially.406z_icmh(Z_R1, 5, 4, Rsrc);407z_icm(Z_R0, 5, 2, Rsrc);408z_icm(Z_R1, 5, 6, Rsrc);409z_stmg(Z_R0, Z_R1, 0, Rdst);410z_bru(AllDone);411bind(skipShortcut);412}413#endif414clear_reg(Z_R0); // make sure register is properly initialized.415416if (VM_Version::has_VectorFacility()) {417const int min_vcnt = 32; // Minimum #characters required to use vector instructions.418// Otherwise just do nothing in vector mode.419// Must be multiple of vector register length (16 bytes = 128 bits).420const int log_min_vcnt = exact_log2(min_vcnt);421Label VectorLoop, VectorDone;422423assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");424z_srak(Rix, Rcnt, log_min_vcnt); // calculate # vector loop iterations425z_brz(VectorDone); // skip if none426427z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop428429bind(VectorLoop);430z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)431add2reg(Rsrc, min_vcnt);432433z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)434z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)435z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)436z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)437z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes438add2reg(Rdst, min_vcnt*2);439440z_brct(Rix, VectorLoop);441442bind(VectorDone);443}444445const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.446// Otherwise just do nothing in unrolled scalar mode.447// Must be multiple of 8.448{449const int log_min_cnt = exact_log2(min_cnt);450Label UnrolledLoop, UnrolledDone;451452453if (VM_Version::has_DistinctOpnds()) {454z_srk(Rix, Rcnt, Z_R0); // remaining # chars to process in unrolled loop455} else {456z_lr(Rix, Rcnt);457z_sr(Rix, Z_R0);458}459z_sra(Rix, log_min_cnt); // unrolled loop count460z_brz(UnrolledDone);461462clear_reg(Z_R0);463clear_reg(Z_R1);464465bind(UnrolledLoop);466z_icmh(Z_R0, 5, 0, Rsrc);467z_icmh(Z_R1, 5, 4, Rsrc);468z_icm(Z_R0, 5, 2, Rsrc);469z_icm(Z_R1, 5, 6, Rsrc);470add2reg(Rsrc, min_cnt);471472z_stmg(Z_R0, Z_R1, 0, Rdst);473474add2reg(Rdst, min_cnt*2);475z_brct(Rix, UnrolledLoop);476477bind(UnrolledDone);478z_lgfr(Z_R0, Rcnt); // # chars left over after unrolled loop.479z_nilf(Z_R0, min_cnt-1);480z_brnz(ScalarShortcut); // if zero, there is nothing left to do for scalar loop.481// Rix == 0 in all cases.482z_sgfr(Z_R0, Rcnt); // negative # characters the ptrs have been advanced previously.483z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.484z_agr(Rdst, Z_R0);485z_agr(Rsrc, Z_R0); // restore ptr.486z_bru(AllDone);487}488489{490bind(ScalarShortcut);491// Z_R0 must contain remaining # characters as 64-bit signed int here.492// register contents is preserved over scalar processing (for register fixup).493494#if 0 // Sacrifice shortcuts for code compactness495{496Label ScalarDefault;497z_chi(Rcnt, 2);498z_brh(ScalarDefault);499z_llc(Z_R0, 0, Z_R0, Rsrc); // 6 bytes500z_sth(Z_R0, 0, Z_R0, Rdst); // 4 bytes501z_brl(AllDone);502z_llc(Z_R0, 1, Z_R0, Rsrc); // 6 bytes503z_sth(Z_R0, 2, Z_R0, Rdst); // 4 bytes504z_bru(AllDone);505bind(ScalarDefault);506}507#endif508509Label CodeTable;510// Some comments on Rix calculation:511// - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions).512// - high word of both Rix and Rcnt may contain garbage513// - the final lngfr takes care of that garbage, extending the sign to high word514z_sllg(Rix, Z_R0, 2); // calculate 10*Rix = (4*Rix + Rix)*2515z_ar(Rix, Z_R0);516z_larl(Z_R1, CodeTable);517z_sll(Rix, 1);518z_lngfr(Rix, Rix); // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)].519z_bc(Assembler::bcondAlways, 0, Rix, Z_R1);520521z_llc(Z_R1, 6, Z_R0, Rsrc); // 6 bytes522z_sth(Z_R1, 12, Z_R0, Rdst); // 4 bytes523524z_llc(Z_R1, 5, Z_R0, Rsrc);525z_sth(Z_R1, 10, Z_R0, Rdst);526527z_llc(Z_R1, 4, Z_R0, Rsrc);528z_sth(Z_R1, 8, Z_R0, Rdst);529530z_llc(Z_R1, 3, Z_R0, Rsrc);531z_sth(Z_R1, 6, Z_R0, Rdst);532533z_llc(Z_R1, 2, Z_R0, Rsrc);534z_sth(Z_R1, 4, Z_R0, Rdst);535536z_llc(Z_R1, 1, Z_R0, Rsrc);537z_sth(Z_R1, 2, Z_R0, Rdst);538539z_llc(Z_R1, 0, Z_R0, Rsrc);540z_sth(Z_R1, 0, Z_R0, Rdst);541bind(CodeTable);542543z_chi(Rcnt, 8); // no fixup for small strings. Rdst, Rsrc were not modified.544z_brl(AllDone);545546z_sgfr(Z_R0, Rcnt); // # characters the ptrs have been advanced previously.547z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.548z_agr(Rdst, Z_R0);549z_agr(Rsrc, Z_R0); // restore ptr.550}551bind(AllDone);552553BLOCK_COMMENT("} string_inflate");554return offset() - block_start;555}556557// Inflate byte[] to char[], length known at compile time.558// Restores: src, dst559// Kills: tmp, Z_R0, Z_R1.560// Note:561// len is signed int. Counts # characters, not bytes.562unsigned int C2_MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) {563assert_different_registers(Z_R0, Z_R1, src, dst, tmp);564565BLOCK_COMMENT("string_inflate_const {");566int block_start = offset();567568Register Rix = tmp; // loop index569Register Rsrc = src; // addr(src array)570Register Rdst = dst; // addr(dst array)571Label ScalarShortcut, AllDone;572int nprocessed = 0;573int src_off = 0; // compensate for saved (optimized away) ptr advancement.574int dst_off = 0; // compensate for saved (optimized away) ptr advancement.575bool restore_inputs = false;576bool workreg_clear = false;577578if ((len >= 32) && VM_Version::has_VectorFacility()) {579const int min_vcnt = 32; // Minimum #characters required to use vector instructions.580// Otherwise just do nothing in vector mode.581// Must be multiple of vector register length (16 bytes = 128 bits).582const int log_min_vcnt = exact_log2(min_vcnt);583const int iterations = (len - nprocessed) >> log_min_vcnt;584nprocessed += iterations << log_min_vcnt;585Label VectorLoop;586587if (iterations == 1) {588z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc); // get next 32 characters (single-byte)589z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)590z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)591z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)592z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)593z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes594595src_off += min_vcnt;596dst_off += min_vcnt*2;597} else {598restore_inputs = true;599600z_lgfi(Rix, len>>log_min_vcnt);601bind(VectorLoop);602z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)603add2reg(Rsrc, min_vcnt);604605z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)606z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)607z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)608z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)609z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes610add2reg(Rdst, min_vcnt*2);611612z_brct(Rix, VectorLoop);613}614}615616if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) {617const int min_vcnt = 16; // Minimum #characters required to use vector instructions.618// Otherwise just do nothing in vector mode.619// Must be multiple of vector register length (16 bytes = 128 bits).620const int log_min_vcnt = exact_log2(min_vcnt);621const int iterations = (len - nprocessed) >> log_min_vcnt;622nprocessed += iterations << log_min_vcnt;623assert(iterations == 1, "must be!");624625z_vl(Z_V20, 0+src_off, Z_R0, Rsrc); // get next 16 characters (single-byte)626z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)627z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)628z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes629630src_off += min_vcnt;631dst_off += min_vcnt*2;632}633634if ((len-nprocessed) > 8) {635const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.636// Otherwise just do nothing in unrolled scalar mode.637// Must be multiple of 8.638const int log_min_cnt = exact_log2(min_cnt);639const int iterations = (len - nprocessed) >> log_min_cnt;640nprocessed += iterations << log_min_cnt;641642//---< avoid loop overhead/ptr increment for small # iterations >---643if (iterations <= 2) {644clear_reg(Z_R0);645clear_reg(Z_R1);646workreg_clear = true;647648z_icmh(Z_R0, 5, 0+src_off, Rsrc);649z_icmh(Z_R1, 5, 4+src_off, Rsrc);650z_icm(Z_R0, 5, 2+src_off, Rsrc);651z_icm(Z_R1, 5, 6+src_off, Rsrc);652z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);653654src_off += min_cnt;655dst_off += min_cnt*2;656}657658if (iterations == 2) {659z_icmh(Z_R0, 5, 0+src_off, Rsrc);660z_icmh(Z_R1, 5, 4+src_off, Rsrc);661z_icm(Z_R0, 5, 2+src_off, Rsrc);662z_icm(Z_R1, 5, 6+src_off, Rsrc);663z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);664665src_off += min_cnt;666dst_off += min_cnt*2;667}668669if (iterations > 2) {670Label UnrolledLoop;671restore_inputs = true;672673clear_reg(Z_R0);674clear_reg(Z_R1);675workreg_clear = true;676677z_lgfi(Rix, iterations);678bind(UnrolledLoop);679z_icmh(Z_R0, 5, 0, Rsrc);680z_icmh(Z_R1, 5, 4, Rsrc);681z_icm(Z_R0, 5, 2, Rsrc);682z_icm(Z_R1, 5, 6, Rsrc);683add2reg(Rsrc, min_cnt);684685z_stmg(Z_R0, Z_R1, 0, Rdst);686add2reg(Rdst, min_cnt*2);687688z_brct(Rix, UnrolledLoop);689}690}691692if ((len-nprocessed) > 0) {693switch (len-nprocessed) {694case 8:695if (!workreg_clear) {696clear_reg(Z_R0);697clear_reg(Z_R1);698}699z_icmh(Z_R0, 5, 0+src_off, Rsrc);700z_icmh(Z_R1, 5, 4+src_off, Rsrc);701z_icm(Z_R0, 5, 2+src_off, Rsrc);702z_icm(Z_R1, 5, 6+src_off, Rsrc);703z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);704break;705case 7:706if (!workreg_clear) {707clear_reg(Z_R0);708clear_reg(Z_R1);709}710clear_reg(Rix);711z_icm(Z_R0, 5, 0+src_off, Rsrc);712z_icm(Z_R1, 5, 2+src_off, Rsrc);713z_icm(Rix, 5, 4+src_off, Rsrc);714z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);715z_llc(Z_R0, 6+src_off, Z_R0, Rsrc);716z_st(Rix, 8+dst_off, Z_R0, Rdst);717z_sth(Z_R0, 12+dst_off, Z_R0, Rdst);718break;719case 6:720if (!workreg_clear) {721clear_reg(Z_R0);722clear_reg(Z_R1);723}724clear_reg(Rix);725z_icm(Z_R0, 5, 0+src_off, Rsrc);726z_icm(Z_R1, 5, 2+src_off, Rsrc);727z_icm(Rix, 5, 4+src_off, Rsrc);728z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);729z_st(Rix, 8+dst_off, Z_R0, Rdst);730break;731case 5:732if (!workreg_clear) {733clear_reg(Z_R0);734clear_reg(Z_R1);735}736z_icm(Z_R0, 5, 0+src_off, Rsrc);737z_icm(Z_R1, 5, 2+src_off, Rsrc);738z_llc(Rix, 4+src_off, Z_R0, Rsrc);739z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);740z_sth(Rix, 8+dst_off, Z_R0, Rdst);741break;742case 4:743if (!workreg_clear) {744clear_reg(Z_R0);745clear_reg(Z_R1);746}747z_icm(Z_R0, 5, 0+src_off, Rsrc);748z_icm(Z_R1, 5, 2+src_off, Rsrc);749z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);750break;751case 3:752if (!workreg_clear) {753clear_reg(Z_R0);754}755z_llc(Z_R1, 2+src_off, Z_R0, Rsrc);756z_icm(Z_R0, 5, 0+src_off, Rsrc);757z_sth(Z_R1, 4+dst_off, Z_R0, Rdst);758z_st(Z_R0, 0+dst_off, Rdst);759break;760case 2:761z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);762z_llc(Z_R1, 1+src_off, Z_R0, Rsrc);763z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);764z_sth(Z_R1, 2+dst_off, Z_R0, Rdst);765break;766case 1:767z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);768z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);769break;770default:771guarantee(false, "Impossible");772break;773}774src_off += len-nprocessed;775dst_off += (len-nprocessed)*2;776nprocessed = len;777}778779//---< restore modified input registers >---780if ((nprocessed > 0) && restore_inputs) {781z_agfi(Rsrc, -(nprocessed-src_off));782if (nprocessed < 1000000000) { // avoid int overflow783z_agfi(Rdst, -(nprocessed*2-dst_off));784} else {785z_agfi(Rdst, -(nprocessed-dst_off));786z_agfi(Rdst, -nprocessed);787}788}789790BLOCK_COMMENT("} string_inflate_const");791return offset() - block_start;792}793794// Kills src.795unsigned int C2_MacroAssembler::has_negatives(Register result, Register src, Register cnt,796Register odd_reg, Register even_reg, Register tmp) {797int block_start = offset();798Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;799const Register addr = src, mask = tmp;800801BLOCK_COMMENT("has_negatives {");802803z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.)804z_llilf(mask, 0x80808080);805z_lhi(result, 1); // Assume true.806// Last possible addr for fast loop.807z_lay(odd_reg, -16, Z_R1, src);808z_chi(cnt, 16);809z_brl(Lslow);810811// ind1: index, even_reg: index increment, odd_reg: index limit812z_iihf(mask, 0x80808080);813z_lghi(even_reg, 16);814815bind(Lloop1); // 16 bytes per iteration.816z_lg(Z_R0, Address(addr));817z_lg(Z_R1, Address(addr, 8));818z_ogr(Z_R0, Z_R1);819z_ngr(Z_R0, mask);820z_brne(Ldone); // If found return 1.821z_brxlg(addr, even_reg, Lloop1);822823bind(Lslow);824z_aghi(odd_reg, 16-1); // Last possible addr for slow loop.825z_lghi(even_reg, 1);826z_cgr(addr, odd_reg);827z_brh(Lnotfound);828829bind(Lloop2); // 1 byte per iteration.830z_cli(Address(addr), 0x80);831z_brnl(Ldone); // If found return 1.832z_brxlg(addr, even_reg, Lloop2);833834bind(Lnotfound);835z_lhi(result, 0);836837bind(Ldone);838839BLOCK_COMMENT("} has_negatives");840841return offset() - block_start;842}843844// kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result845unsigned int C2_MacroAssembler::string_compare(Register str1, Register str2,846Register cnt1, Register cnt2,847Register odd_reg, Register even_reg, Register result, int ae) {848int block_start = offset();849850assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result);851assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result);852853// If strings are equal up to min length, return the length difference.854const Register diff = result, // Pre-set result with length difference.855min = cnt1, // min number of bytes856tmp = cnt2;857858// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)859// we interchange str1 and str2 in the UL case and negate the result.860// Like this, str1 is always latin1 encoded, except for the UU case.861// In addition, we need 0 (or sign which is 0) extend when using 64 bit register.862const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);863864BLOCK_COMMENT("string_compare {");865866if (used_as_LU) {867z_srl(cnt2, 1);868}869870// See if the lengths are different, and calculate min in cnt1.871// Save diff in case we need it for a tie-breaker.872873// diff = cnt1 - cnt2874if (VM_Version::has_DistinctOpnds()) {875z_srk(diff, cnt1, cnt2);876} else {877z_lr(diff, cnt1);878z_sr(diff, cnt2);879}880if (str1 != str2) {881if (VM_Version::has_LoadStoreConditional()) {882z_locr(min, cnt2, Assembler::bcondHigh);883} else {884Label Lskip;885z_brl(Lskip); // min ok if cnt1 < cnt2886z_lr(min, cnt2); // min = cnt2887bind(Lskip);888}889}890891if (ae == StrIntrinsicNode::UU) {892z_sra(diff, 1);893}894if (str1 != str2) {895Label Ldone;896if (used_as_LU) {897// Loop which searches the first difference character by character.898Label Lloop;899const Register ind1 = Z_R1,900ind2 = min;901int stride1 = 1, stride2 = 2; // See comment above.902903// ind1: index, even_reg: index increment, odd_reg: index limit904z_llilf(ind1, (unsigned int)(-stride1));905z_lhi(even_reg, stride1);906add2reg(odd_reg, -stride1, min);907clear_reg(ind2); // kills min908909bind(Lloop);910z_brxh(ind1, even_reg, Ldone);911z_llc(tmp, Address(str1, ind1));912z_llh(Z_R0, Address(str2, ind2));913z_ahi(ind2, stride2);914z_sr(tmp, Z_R0);915z_bre(Lloop);916917z_lr(result, tmp);918919} else {920// Use clcle in fast loop (only for same encoding).921z_lgr(Z_R0, str1);922z_lgr(even_reg, str2);923z_llgfr(Z_R1, min);924z_llgfr(odd_reg, min);925926if (ae == StrIntrinsicNode::LL) {927compare_long_ext(Z_R0, even_reg, 0);928} else {929compare_long_uni(Z_R0, even_reg, 0);930}931z_bre(Ldone);932z_lgr(Z_R1, Z_R0);933if (ae == StrIntrinsicNode::LL) {934z_llc(Z_R0, Address(even_reg));935z_llc(result, Address(Z_R1));936} else {937z_llh(Z_R0, Address(even_reg));938z_llh(result, Address(Z_R1));939}940z_sr(result, Z_R0);941}942943// Otherwise, return the difference between the first mismatched chars.944bind(Ldone);945}946947if (ae == StrIntrinsicNode::UL) {948z_lcr(result, result); // Negate result (see note above).949}950951BLOCK_COMMENT("} string_compare");952953return offset() - block_start;954}955956unsigned int C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,957Register odd_reg, Register even_reg, Register result, bool is_byte) {958int block_start = offset();959960BLOCK_COMMENT("array_equals {");961962assert_different_registers(ary1, limit, odd_reg, even_reg);963assert_different_registers(ary2, limit, odd_reg, even_reg);964965Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template;966int base_offset = 0;967968if (ary1 != ary2) {969if (is_array_equ) {970base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);971972// Return true if the same array.973compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);974975// Return false if one of them is NULL.976compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);977compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);978979// Load the lengths of arrays.980z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));981982// Return false if the two arrays are not equal length.983z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));984z_brne(Ldone_false);985986// string len in bytes (right operand)987if (!is_byte) {988z_chi(odd_reg, 128);989z_sll(odd_reg, 1); // preserves flags990z_brh(Lclcle);991} else {992compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);993}994} else {995z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.996compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);997}9989991000// Use clc instruction for up to 256 bytes.1001{1002Register str1_reg = ary1,1003str2_reg = ary2;1004if (is_array_equ) {1005str1_reg = Z_R1;1006str2_reg = even_reg;1007add2reg(str1_reg, base_offset, ary1); // string addr (left operand)1008add2reg(str2_reg, base_offset, ary2); // string addr (right operand)1009}1010z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.1011z_brl(Ldone_true);1012// Note: We could jump to the template if equal.10131014assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");1015z_exrl(odd_reg, CLC_template);1016z_bre(Ldone_true);1017// fall through10181019bind(Ldone_false);1020clear_reg(result);1021z_bru(Ldone);10221023bind(CLC_template);1024z_clc(0, 0, str1_reg, 0, str2_reg);1025}10261027// Use clcle instruction.1028{1029bind(Lclcle);1030add2reg(even_reg, base_offset, ary2); // string addr (right operand)1031add2reg(Z_R0, base_offset, ary1); // string addr (left operand)10321033z_lgr(Z_R1, odd_reg); // string len in bytes (left operand)1034if (is_byte) {1035compare_long_ext(Z_R0, even_reg, 0);1036} else {1037compare_long_uni(Z_R0, even_reg, 0);1038}1039z_lghi(result, 0); // Preserve flags.1040z_brne(Ldone);1041}1042}1043// fall through10441045bind(Ldone_true);1046z_lghi(result, 1); // All characters are equal.1047bind(Ldone);10481049BLOCK_COMMENT("} array_equals");10501051return offset() - block_start;1052}10531054// kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result1055unsigned int C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,1056Register needle, Register needlecnt, int needlecntval,1057Register odd_reg, Register even_reg, int ae) {1058int block_start = offset();10591060// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!1061assert(ae != StrIntrinsicNode::LU, "Invalid encoding");1062const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;1063const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;1064Label L_needle1, L_Found, L_NotFound;10651066BLOCK_COMMENT("string_indexof {");10671068if (needle == haystack) {1069z_lhi(result, 0);1070} else {10711072// Load first character of needle (R0 used by search_string instructions).1073if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }10741075// Compute last haystack addr to use if no match gets found.1076if (needlecnt != noreg) { // variable needlecnt1077z_ahi(needlecnt, -1); // Remaining characters after first one.1078z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare.1079if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.1080} else { // constant needlecnt1081assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate");1082// Compute index succeeding last element to compare.1083if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }1084}10851086z_llgfr(haycnt, haycnt); // Clear high half.1087z_lgr(result, haystack); // Final result will be computed from needle start pointer.1088if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.1089z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).10901091if (h_csize != n_csize) {1092assert(ae == StrIntrinsicNode::UL, "Invalid encoding");10931094if (needlecnt != noreg || needlecntval != 1) {1095if (needlecnt != noreg) {1096compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);1097}10981099// Main Loop: UL version (now we have at least 2 characters).1100Label L_OuterLoop, L_InnerLoop, L_Skip;1101bind(L_OuterLoop); // Search for 1st 2 characters.1102z_lgr(Z_R1, haycnt);1103MacroAssembler::search_string_uni(Z_R1, result);1104z_brc(Assembler::bcondNotFound, L_NotFound);1105z_lgr(result, Z_R1);11061107z_lghi(Z_R1, n_csize);1108z_lghi(even_reg, h_csize);1109bind(L_InnerLoop);1110z_llgc(odd_reg, Address(needle, Z_R1));1111z_ch(odd_reg, Address(result, even_reg));1112z_brne(L_Skip);1113if (needlecnt != noreg) { z_cr(Z_R1, needlecnt); } else { z_chi(Z_R1, needlecntval - 1); }1114z_brnl(L_Found);1115z_aghi(Z_R1, n_csize);1116z_aghi(even_reg, h_csize);1117z_bru(L_InnerLoop);11181119bind(L_Skip);1120z_aghi(result, h_csize); // This is the new address we want to use for comparing.1121z_bru(L_OuterLoop);1122}11231124} else {1125const intptr_t needle_bytes = (n_csize == 2) ? ((needlecntval - 1) << 1) : (needlecntval - 1);1126Label L_clcle;11271128if (needlecnt != noreg || (needlecntval != 1 && needle_bytes <= 256)) {1129if (needlecnt != noreg) {1130compare32_and_branch(needlecnt, 256, Assembler::bcondHigh, L_clcle);1131z_ahi(needlecnt, -1); // remaining bytes -1 (for CLC)1132z_brl(L_needle1);1133}11341135// Main Loop: clc version (now we have at least 2 characters).1136Label L_OuterLoop, CLC_template;1137bind(L_OuterLoop); // Search for 1st 2 characters.1138z_lgr(Z_R1, haycnt);1139if (h_csize == 1) {1140MacroAssembler::search_string(Z_R1, result);1141} else {1142MacroAssembler::search_string_uni(Z_R1, result);1143}1144z_brc(Assembler::bcondNotFound, L_NotFound);1145z_lgr(result, Z_R1);11461147if (needlecnt != noreg) {1148assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");1149z_exrl(needlecnt, CLC_template);1150} else {1151z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);1152}1153z_bre(L_Found);1154z_aghi(result, h_csize); // This is the new address we want to use for comparing.1155z_bru(L_OuterLoop);11561157if (needlecnt != noreg) {1158bind(CLC_template);1159z_clc(h_csize, 0, Z_R1, n_csize, needle);1160}1161}11621163if (needlecnt != noreg || needle_bytes > 256) {1164bind(L_clcle);11651166// Main Loop: clcle version (now we have at least 256 bytes).1167Label L_OuterLoop, CLC_template;1168bind(L_OuterLoop); // Search for 1st 2 characters.1169z_lgr(Z_R1, haycnt);1170if (h_csize == 1) {1171MacroAssembler::search_string(Z_R1, result);1172} else {1173MacroAssembler::search_string_uni(Z_R1, result);1174}1175z_brc(Assembler::bcondNotFound, L_NotFound);11761177add2reg(Z_R0, n_csize, needle);1178add2reg(even_reg, h_csize, Z_R1);1179z_lgr(result, Z_R1);1180if (needlecnt != noreg) {1181z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand)1182z_llgfr(odd_reg, needlecnt);1183} else {1184load_const_optimized(Z_R1, needle_bytes);1185if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); }1186}1187if (h_csize == 1) {1188compare_long_ext(Z_R0, even_reg, 0);1189} else {1190compare_long_uni(Z_R0, even_reg, 0);1191}1192z_bre(L_Found);11931194if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.1195z_aghi(result, h_csize); // This is the new address we want to use for comparing.1196z_bru(L_OuterLoop);1197}1198}11991200if (needlecnt != noreg || needlecntval == 1) {1201bind(L_needle1);12021203// Single needle character version.1204if (h_csize == 1) {1205MacroAssembler::search_string(haycnt, result);1206} else {1207MacroAssembler::search_string_uni(haycnt, result);1208}1209z_lgr(result, haycnt);1210z_brc(Assembler::bcondFound, L_Found);1211}12121213bind(L_NotFound);1214add2reg(result, -1, haystack); // Return -1.12151216bind(L_Found); // Return index (or -1 in fallthrough case).1217z_sgr(result, haystack);1218if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); }1219}1220BLOCK_COMMENT("} string_indexof");12211222return offset() - block_start;1223}12241225// early clobber: result1226unsigned int C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,1227Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) {1228int block_start = offset();12291230BLOCK_COMMENT("string_indexof_char {");12311232if (needle == haystack) {1233z_lhi(result, 0);1234} else {12351236Label Ldone;12371238z_llgfr(odd_reg, haycnt); // Preset loop ctr/searchrange end.1239if (needle == noreg) {1240load_const_optimized(Z_R0, (unsigned long)needleChar);1241} else {1242if (is_byte) {1243z_llgcr(Z_R0, needle); // First (and only) needle char.1244} else {1245z_llghr(Z_R0, needle); // First (and only) needle char.1246}1247}12481249if (!is_byte) {1250z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU.1251}12521253z_lgr(even_reg, haystack); // haystack addr1254z_agr(odd_reg, haystack); // First char after range end.1255z_lghi(result, -1);12561257if (is_byte) {1258MacroAssembler::search_string(odd_reg, even_reg);1259} else {1260MacroAssembler::search_string_uni(odd_reg, even_reg);1261}1262z_brc(Assembler::bcondNotFound, Ldone);1263if (is_byte) {1264if (VM_Version::has_DistinctOpnds()) {1265z_sgrk(result, odd_reg, haystack);1266} else {1267z_sgr(odd_reg, haystack);1268z_lgr(result, odd_reg);1269}1270} else {1271z_slgr(odd_reg, haystack);1272z_srlg(result, odd_reg, exact_log2(sizeof(jchar)));1273}12741275bind(Ldone);1276}1277BLOCK_COMMENT("} string_indexof_char");12781279return offset() - block_start;1280}1281128212831284