Path: blob/master/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp
40930 views
/*1* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "asm/assembler.hpp"26#include "asm/assembler.inline.hpp"27#include "opto/c2_MacroAssembler.hpp"28#include "opto/intrinsicnode.hpp"29#include "runtime/vm_version.hpp"3031#ifdef PRODUCT32#define BLOCK_COMMENT(str) // nothing33#else34#define BLOCK_COMMENT(str) block_comment(str)35#endif36#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")3738// Intrinsics for CompactStrings3940// Compress char[] to byte[] by compressing 16 bytes at once.41void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,42Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,43Label& Lfailure) {4445const Register tmp0 = R0;46assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);47Label Lloop, Lslow;4849// Check if cnt >= 8 (= 16 bytes)50lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF00FF00FF51srwi_(tmp2, cnt, 3);52beq(CCR0, Lslow);53ori(tmp1, tmp1, 0xFF);54rldimi(tmp1, tmp1, 32, 0);55mtctr(tmp2);5657// 2x unrolled loop58bind(Lloop);59ld(tmp2, 0, src); // _0_1_2_3 (Big Endian)60ld(tmp4, 8, src); // _4_5_6_76162orr(tmp0, tmp2, tmp4);63rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_264rldimi(tmp2, tmp2, 2*8, 2*8); // _0_2_3_365rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_666rldimi(tmp4, tmp4, 2*8, 2*8); // _4_6_7_76768andc_(tmp0, tmp0, tmp1);69bne(CCR0, Lfailure); // Not latin1.70addi(src, src, 16);7172rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_373srdi(tmp2, tmp2, 3*8); // ____0_2_74rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_775srdi(tmp4, tmp4, 3*8); // ____4_6_7677orr(tmp2, tmp2, tmp3); // ____012378orr(tmp4, tmp4, tmp5); // ____45677980stw(tmp2, 0, dst);81stw(tmp4, 4, dst);82addi(dst, dst, 8);83bdnz(Lloop);8485bind(Lslow); // Fallback to slow version86}8788// Compress char[] to byte[]. cnt must be positive int.89void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {90Label Lloop;91mtctr(cnt);9293bind(Lloop);94lhz(tmp, 0, src);95cmplwi(CCR0, tmp, 0xff);96bgt(CCR0, Lfailure); // Not latin1.97addi(src, src, 2);98stb(tmp, 0, dst);99addi(dst, dst, 1);100bdnz(Lloop);101}102103// Inflate byte[] to char[] by inflating 16 bytes at once.104void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,105Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {106const Register tmp0 = R0;107assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);108Label Lloop, Lslow;109110// Check if cnt >= 8111srwi_(tmp2, cnt, 3);112beq(CCR0, Lslow);113lis(tmp1, 0xFF); // tmp1 = 0x00FF00FF114ori(tmp1, tmp1, 0xFF);115mtctr(tmp2);116117// 2x unrolled loop118bind(Lloop);119lwz(tmp2, 0, src); // ____0123 (Big Endian)120lwz(tmp4, 4, src); // ____4567121addi(src, src, 8);122123rldicl(tmp3, tmp2, 7*8, 64-8); // _______2124rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113125rldicl(tmp5, tmp4, 7*8, 64-8); // _______6126rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557127128andc(tmp0, tmp2, tmp1); // ____0_1_129rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3130andc(tmp3, tmp4, tmp1); // ____4_5_131rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7132133rldimi(tmp2, tmp0, 3*8, 0*8); // _0_1_2_3134rldimi(tmp4, tmp3, 3*8, 0*8); // _4_5_6_7135136std(tmp2, 0, dst);137std(tmp4, 8, dst);138addi(dst, dst, 16);139bdnz(Lloop);140141bind(Lslow); // Fallback to slow version142}143144// Inflate byte[] to char[]. cnt must be positive int.145void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {146Label Lloop;147mtctr(cnt);148149bind(Lloop);150lbz(tmp, 0, src);151addi(src, src, 1);152sth(tmp, 0, dst);153addi(dst, dst, 2);154bdnz(Lloop);155}156157void C2_MacroAssembler::string_compare(Register str1, Register str2,158Register cnt1, Register cnt2,159Register tmp1, Register result, int ae) {160const Register tmp0 = R0,161diff = tmp1;162163assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);164Label Ldone, Lslow, Lloop, Lreturn_diff;165166// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)167// we interchange str1 and str2 in the UL case and negate the result.168// Like this, str1 is always latin1 encoded, except for the UU case.169// In addition, we need 0 (or sign which is 0) extend.170171if (ae == StrIntrinsicNode::UU) {172srwi(cnt1, cnt1, 1);173} else {174clrldi(cnt1, cnt1, 32);175}176177if (ae != StrIntrinsicNode::LL) {178srwi(cnt2, cnt2, 1);179} else {180clrldi(cnt2, cnt2, 32);181}182183// See if the lengths are different, and calculate min in cnt1.184// Save diff in case we need it for a tie-breaker.185subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2186// if (diff > 0) { cnt1 = cnt2; }187if (VM_Version::has_isel()) {188isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);189} else {190Label Lskip;191blt(CCR0, Lskip);192mr(cnt1, cnt2);193bind(Lskip);194}195196// Rename registers197Register chr1 = result;198Register chr2 = tmp0;199200// Compare multiple characters in fast loop (only implemented for same encoding).201int stride1 = 8, stride2 = 8;202if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {203int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;204Label Lfastloop, Lskipfast;205206srwi_(tmp0, cnt1, log2_chars_per_iter);207beq(CCR0, Lskipfast);208rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.209li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.210mtctr(tmp0);211212bind(Lfastloop);213ld(chr1, 0, str1);214ld(chr2, 0, str2);215cmpd(CCR0, chr1, chr2);216bne(CCR0, Lslow);217addi(str1, str1, stride1);218addi(str2, str2, stride2);219bdnz(Lfastloop);220mr(cnt1, cnt2); // Remaining characters.221bind(Lskipfast);222}223224// Loop which searches the first difference character by character.225cmpwi(CCR0, cnt1, 0);226beq(CCR0, Lreturn_diff);227bind(Lslow);228mtctr(cnt1);229230switch (ae) {231case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;232case StrIntrinsicNode::UL: // fallthru (see comment above)233case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;234case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;235default: ShouldNotReachHere(); break;236}237238bind(Lloop);239if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }240if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }241subf_(result, chr2, chr1); // result = chr1 - chr2242bne(CCR0, Ldone);243addi(str1, str1, stride1);244addi(str2, str2, stride2);245bdnz(Lloop);246247// If strings are equal up to min length, return the length difference.248bind(Lreturn_diff);249mr(result, diff);250251// Otherwise, return the difference between the first mismatched chars.252bind(Ldone);253if (ae == StrIntrinsicNode::UL) {254neg(result, result); // Negate result (see note above).255}256}257258void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,259Register limit, Register tmp1, Register result, bool is_byte) {260const Register tmp0 = R0;261assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);262Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;263bool limit_needs_shift = false;264265if (is_array_equ) {266const int length_offset = arrayOopDesc::length_offset_in_bytes();267const int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);268269// Return true if the same array.270cmpd(CCR0, ary1, ary2);271beq(CCR0, Lskiploop);272273// Return false if one of them is NULL.274cmpdi(CCR0, ary1, 0);275cmpdi(CCR1, ary2, 0);276li(result, 0);277cror(CCR0, Assembler::equal, CCR1, Assembler::equal);278beq(CCR0, Ldone);279280// Load the lengths of arrays.281lwz(limit, length_offset, ary1);282lwz(tmp0, length_offset, ary2);283284// Return false if the two arrays are not equal length.285cmpw(CCR0, limit, tmp0);286bne(CCR0, Ldone);287288// Load array addresses.289addi(ary1, ary1, base_offset);290addi(ary2, ary2, base_offset);291} else {292limit_needs_shift = !is_byte;293li(result, 0); // Assume not equal.294}295296// Rename registers297Register chr1 = tmp0;298Register chr2 = tmp1;299300// Compare 8 bytes per iteration in fast loop.301const int log2_chars_per_iter = is_byte ? 3 : 2;302303srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));304beq(CCR0, Lskipfast);305mtctr(tmp0);306307bind(Lfastloop);308ld(chr1, 0, ary1);309ld(chr2, 0, ary2);310addi(ary1, ary1, 8);311addi(ary2, ary2, 8);312cmpd(CCR0, chr1, chr2);313bne(CCR0, Ldone);314bdnz(Lfastloop);315316bind(Lskipfast);317rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.318beq(CCR0, Lskiploop);319mtctr(limit);320321// Character by character.322bind(Lloop);323if (is_byte) {324lbz(chr1, 0, ary1);325lbz(chr2, 0, ary2);326addi(ary1, ary1, 1);327addi(ary2, ary2, 1);328} else {329lhz(chr1, 0, ary1);330lhz(chr2, 0, ary2);331addi(ary1, ary1, 2);332addi(ary2, ary2, 2);333}334cmpw(CCR0, chr1, chr2);335bne(CCR0, Ldone);336bdnz(Lloop);337338bind(Lskiploop);339li(result, 1); // All characters are equal.340bind(Ldone);341}342343void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,344Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,345Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {346347// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!348Label L_TooShort, L_Found, L_NotFound, L_End;349Register last_addr = haycnt, // Kill haycnt at the beginning.350addr = tmp1,351n_start = tmp2,352ch1 = tmp3,353ch2 = R0;354355assert(ae != StrIntrinsicNode::LU, "Invalid encoding");356const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;357const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;358359// **************************************************************************************************360// Prepare for main loop: optimized for needle count >=2, bail out otherwise.361// **************************************************************************************************362363// Compute last haystack addr to use if no match gets found.364clrldi(haycnt, haycnt, 32); // Ensure positive int is valid as 64 bit value.365addi(addr, haystack, -h_csize); // Accesses use pre-increment.366if (needlecntval == 0) { // variable needlecnt367cmpwi(CCR6, needlecnt, 2);368clrldi(needlecnt, needlecnt, 32); // Ensure positive int is valid as 64 bit value.369blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately.370}371372if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.373374if (needlecntval == 0) { // variable needlecnt375subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt.376addi(needlecnt, needlecnt, -2); // Rest of needle.377} else { // constant needlecnt378guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");379assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");380addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt.381if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.382}383384if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.385386if (ae ==StrIntrinsicNode::UL) {387srwi(tmp4, n_start, 1*8); // ___0388rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1389}390391add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)).392393// Main Loop (now we have at least 2 characters).394Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;395bind(L_OuterLoop); // Search for 1st 2 characters.396Register addr_diff = tmp4;397subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.398addi(addr, addr, h_csize); // This is the new address we want to use for comparing.399srdi_(ch2, addr_diff, h_csize);400beq(CCR0, L_FinalCheck); // 2 characters left?401mtctr(ch2); // num of characters / 2402bind(L_InnerLoop); // Main work horse (2x unrolled search loop)403if (h_csize == 2) { // Load 2 characters of haystack (ignore alignment).404lwz(ch1, 0, addr);405lwz(ch2, 2, addr);406} else {407lhz(ch1, 0, addr);408lhz(ch2, 1, addr);409}410cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).411cmpw(CCR1, ch2, n_start);412beq(CCR0, L_Comp1); // Did we find the needle start?413beq(CCR1, L_Comp2);414addi(addr, addr, 2 * h_csize);415bdnz(L_InnerLoop);416bind(L_FinalCheck);417andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.418beq(CCR0, L_NotFound);419if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.420cmpw(CCR1, ch1, n_start);421beq(CCR1, L_Comp1);422bind(L_NotFound);423li(result, -1); // not found424b(L_End);425426// **************************************************************************************************427// Special Case: unfortunately, the variable needle case can be called with needlecnt<2428// **************************************************************************************************429if (needlecntval == 0) { // We have to handle these cases separately.430Label L_OneCharLoop;431bind(L_TooShort);432mtctr(haycnt);433if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle434bind(L_OneCharLoop);435if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }436cmpw(CCR1, ch1, n_start);437beq(CCR1, L_Found); // Did we find the one character needle?438bdnz(L_OneCharLoop);439li(result, -1); // Not found.440b(L_End);441}442443// **************************************************************************************************444// Regular Case Part II: compare rest of needle (first 2 characters have been compared already)445// **************************************************************************************************446447// Compare the rest448bind(L_Comp2);449addi(addr, addr, h_csize); // First comparison has failed, 2nd one hit.450bind(L_Comp1); // Addr points to possible needle start.451if (needlecntval != 2) { // Const needlecnt==2?452if (needlecntval != 3) {453if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?454Register n_ind = tmp4,455h_ind = n_ind;456li(n_ind, 2 * n_csize); // First 2 characters are already compared, use index 2.457mtctr(needlecnt); // Decremented by 2, still > 0.458Label L_CompLoop;459bind(L_CompLoop);460if (ae ==StrIntrinsicNode::UL) {461h_ind = ch1;462sldi(h_ind, n_ind, 1);463}464if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }465if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }466cmpw(CCR1, ch1, ch2);467bne(CCR1, L_OuterLoop);468addi(n_ind, n_ind, n_csize);469bdnz(L_CompLoop);470} else { // No loop required if there's only one needle character left.471if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }472if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }473cmpw(CCR1, ch1, ch2);474bne(CCR1, L_OuterLoop);475}476}477// Return index ...478bind(L_Found);479subf(result, haystack, addr); // relative to haystack, ...480if (h_csize == 2) { srdi(result, result, 1); } // in characters.481bind(L_End);482} // string_indexof483484void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,485Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {486assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);487488Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;489Register addr = tmp1,490ch1 = tmp2,491ch2 = R0;492493const int h_csize = is_byte ? 1 : 2;494495//4:496srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR).497mr(addr, haystack);498beq(CCR0, L_FinalCheck);499mtctr(tmp2); // Move to count register.500//8:501bind(L_InnerLoop); // Main work horse (2x unrolled search loop).502if (!is_byte) {503lhz(ch1, 0, addr);504lhz(ch2, 2, addr);505} else {506lbz(ch1, 0, addr);507lbz(ch2, 1, addr);508}509(needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);510(needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);511beq(CCR0, L_Found1); // Did we find the needle?512beq(CCR1, L_Found2);513addi(addr, addr, 2 * h_csize);514bdnz(L_InnerLoop);515//16:516bind(L_FinalCheck);517andi_(R0, haycnt, 1);518beq(CCR0, L_NotFound);519if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.520(needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);521beq(CCR1, L_Found1);522//21:523bind(L_NotFound);524li(result, -1); // Not found.525b(L_End);526527bind(L_Found2);528addi(addr, addr, h_csize);529//24:530bind(L_Found1); // Return index ...531subf(result, haystack, addr); // relative to haystack, ...532if (!is_byte) { srdi(result, result, 1); } // in characters.533bind(L_End);534} // string_indexof_char535536537void C2_MacroAssembler::has_negatives(Register src, Register cnt, Register result,538Register tmp1, Register tmp2) {539const Register tmp0 = R0;540assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);541Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;542543// Check if cnt >= 8 (= 16 bytes)544lis(tmp1, (int)(short)0x8080); // tmp1 = 0x8080808080808080545srwi_(tmp2, cnt, 4);546li(result, 1); // Assume there's a negative byte.547beq(CCR0, Lslow);548ori(tmp1, tmp1, 0x8080);549rldimi(tmp1, tmp1, 32, 0);550mtctr(tmp2);551552// 2x unrolled loop553bind(Lfastloop);554ld(tmp2, 0, src);555ld(tmp0, 8, src);556557orr(tmp0, tmp2, tmp0);558559and_(tmp0, tmp0, tmp1);560bne(CCR0, Ldone); // Found negative byte.561addi(src, src, 16);562563bdnz(Lfastloop);564565bind(Lslow); // Fallback to slow version566rldicl_(tmp0, cnt, 0, 64-4);567beq(CCR0, Lnoneg);568mtctr(tmp0);569bind(Lloop);570lbz(tmp0, 0, src);571addi(src, src, 1);572andi_(tmp0, tmp0, 0x80);573bne(CCR0, Ldone); // Found negative byte.574bdnz(Lloop);575bind(Lnoneg);576li(result, 0);577578bind(Ldone);579}580581582583