Path: blob/master/src/hotspot/os_cpu/linux_x86/linux_x86_32.S
40951 views
#1# Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.2# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3#4# This code is free software; you can redistribute it and/or modify it5# under the terms of the GNU General Public License version 2 only, as6# published by the Free Software Foundation.7#8# This code is distributed in the hope that it will be useful, but WITHOUT9# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11# version 2 for more details (a copy is included in the LICENSE file that12# accompanied this code).13#14# You should have received a copy of the GNU General Public License version15# 2 along with this work; if not, write to the Free Software Foundation,16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17#18# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19# or visit www.oracle.com if you need additional information or have any20# questions.21#222324# NOTE WELL! The _Copy functions are called directly25# from server-compiler-generated code via CallLeafNoFP,26# which means that they *must* either not use floating27# point or use it in the same manner as does the server28# compiler.2930.globl _Copy_conjoint_bytes31.globl _Copy_arrayof_conjoint_bytes32.globl _Copy_conjoint_jshorts_atomic33.globl _Copy_arrayof_conjoint_jshorts34.globl _Copy_conjoint_jints_atomic35.globl _Copy_arrayof_conjoint_jints36.globl _Copy_conjoint_jlongs_atomic37.globl _mmx_Copy_arrayof_conjoint_jshorts3839.globl _Atomic_cmpxchg_long40.globl _Atomic_move_long4142.text4344.globl SpinPause45.type SpinPause,@function46.p2align 4,,1547SpinPause:48rep49nop50movl $1, %eax51ret5253# Support for void Copy::conjoint_bytes(void* from,54# void* to,55# size_t count)56.p2align 4,,1557.type _Copy_conjoint_bytes,@function58_Copy_conjoint_bytes:59pushl %esi60movl 4+12(%esp),%ecx # count61pushl %edi62movl 8+ 4(%esp),%esi # from63movl 8+ 8(%esp),%edi # to64cmpl %esi,%edi65leal -1(%esi,%ecx),%eax # from + count - 166jbe cb_CopyRight67cmpl %eax,%edi68jbe cb_CopyLeft69# copy from low to high70cb_CopyRight:71cmpl $3,%ecx72jbe 5f # <= 3 bytes73# align source address at dword address boundary74movl %ecx,%eax # original count75movl $4,%ecx76subl %esi,%ecx77andl $3,%ecx # prefix byte count78jz 1f # no prefix79subl %ecx,%eax # byte count less prefix80# copy prefix81subl %esi,%edi820: movb (%esi),%dl83movb %dl,(%edi,%esi,1)84addl $1,%esi85subl $1,%ecx86jnz 0b87addl %esi,%edi881: movl %eax,%ecx # byte count less prefix89shrl $2,%ecx # dword count90jz 4f # no dwords to move91cmpl $32,%ecx92jbe 2f # <= 32 dwords93# copy aligned dwords94rep; smovl95jmp 4f96# copy aligned dwords972: subl %esi,%edi98.p2align 4,,15993: movl (%esi),%edx100movl %edx,(%edi,%esi,1)101addl $4,%esi102subl $1,%ecx103jnz 3b104addl %esi,%edi1054: movl %eax,%ecx # byte count less prefix1065: andl $3,%ecx # suffix byte count107jz 7f # no suffix108# copy suffix109xorl %eax,%eax1106: movb (%esi,%eax,1),%dl111movb %dl,(%edi,%eax,1)112addl $1,%eax113subl $1,%ecx114jnz 6b1157: popl %edi116popl %esi117ret118# copy from high to low119cb_CopyLeft:120std121leal -4(%edi,%ecx),%edi # to + count - 4122movl %eax,%esi # from + count - 1123movl %ecx,%eax124subl $3,%esi # from + count - 4125cmpl $3,%ecx126jbe 5f # <= 3 bytes1271: shrl $2,%ecx # dword count128jz 4f # no dwords to move129cmpl $32,%ecx130ja 3f # > 32 dwords131# copy dwords, aligned or not132subl %esi,%edi133.p2align 4,,151342: movl (%esi),%edx135movl %edx,(%edi,%esi,1)136subl $4,%esi137subl $1,%ecx138jnz 2b139addl %esi,%edi140jmp 4f141# copy dwords, aligned or not1423: rep; smovl1434: movl %eax,%ecx # byte count1445: andl $3,%ecx # suffix byte count145jz 7f # no suffix146# copy suffix147subl %esi,%edi148addl $3,%esi1496: movb (%esi),%dl150movb %dl,(%edi,%esi,1)151subl $1,%esi152subl $1,%ecx153jnz 6b1547: cld155popl %edi156popl %esi157ret158159# Support for void Copy::arrayof_conjoint_bytes(void* from,160# void* to,161# size_t count)162#163# Same as _Copy_conjoint_bytes, except no source alignment check.164.p2align 4,,15165.type _Copy_arrayof_conjoint_bytes,@function166_Copy_arrayof_conjoint_bytes:167pushl %esi168movl 4+12(%esp),%ecx # count169pushl %edi170movl 8+ 4(%esp),%esi # from171movl 8+ 8(%esp),%edi # to172cmpl %esi,%edi173leal -1(%esi,%ecx),%eax # from + count - 1174jbe acb_CopyRight175cmpl %eax,%edi176jbe acb_CopyLeft177# copy from low to high178acb_CopyRight:179cmpl $3,%ecx180jbe 5f1811: movl %ecx,%eax182shrl $2,%ecx183jz 4f184cmpl $32,%ecx185ja 3f186# copy aligned dwords187subl %esi,%edi188.p2align 4,,151892: movl (%esi),%edx190movl %edx,(%edi,%esi,1)191addl $4,%esi192subl $1,%ecx193jnz 2b194addl %esi,%edi195jmp 4f196# copy aligned dwords1973: rep; smovl1984: movl %eax,%ecx1995: andl $3,%ecx200jz 7f201# copy suffix202xorl %eax,%eax2036: movb (%esi,%eax,1),%dl204movb %dl,(%edi,%eax,1)205addl $1,%eax206subl $1,%ecx207jnz 6b2087: popl %edi209popl %esi210ret211acb_CopyLeft:212std213leal -4(%edi,%ecx),%edi # to + count - 4214movl %eax,%esi # from + count - 1215movl %ecx,%eax216subl $3,%esi # from + count - 4217cmpl $3,%ecx218jbe 5f2191: shrl $2,%ecx220jz 4f221cmpl $32,%ecx222jbe 2f # <= 32 dwords223rep; smovl224jmp 4f225.space 82262: subl %esi,%edi227.p2align 4,,152283: movl (%esi),%edx229movl %edx,(%edi,%esi,1)230subl $4,%esi231subl $1,%ecx232jnz 3b233addl %esi,%edi2344: movl %eax,%ecx2355: andl $3,%ecx236jz 7f237subl %esi,%edi238addl $3,%esi2396: movb (%esi),%dl240movb %dl,(%edi,%esi,1)241subl $1,%esi242subl $1,%ecx243jnz 6b2447: cld245popl %edi246popl %esi247ret248249# Support for void Copy::conjoint_jshorts_atomic(void* from,250# void* to,251# size_t count)252.p2align 4,,15253.type _Copy_conjoint_jshorts_atomic,@function254_Copy_conjoint_jshorts_atomic:255pushl %esi256movl 4+12(%esp),%ecx # count257pushl %edi258movl 8+ 4(%esp),%esi # from259movl 8+ 8(%esp),%edi # to260cmpl %esi,%edi261leal -2(%esi,%ecx,2),%eax # from + count*2 - 2262jbe cs_CopyRight263cmpl %eax,%edi264jbe cs_CopyLeft265# copy from low to high266cs_CopyRight:267# align source address at dword address boundary268movl %esi,%eax # original from269andl $3,%eax # either 0 or 2270jz 1f # no prefix271# copy prefix272subl $1,%ecx273jl 5f # zero count274movw (%esi),%dx275movw %dx,(%edi)276addl %eax,%esi # %eax == 2277addl %eax,%edi2781: movl %ecx,%eax # word count less prefix279sarl %ecx # dword count280jz 4f # no dwords to move281cmpl $32,%ecx282jbe 2f # <= 32 dwords283# copy aligned dwords284rep; smovl285jmp 4f286# copy aligned dwords2872: subl %esi,%edi288.p2align 4,,152893: movl (%esi),%edx290movl %edx,(%edi,%esi,1)291addl $4,%esi292subl $1,%ecx293jnz 3b294addl %esi,%edi2954: andl $1,%eax # suffix count296jz 5f # no suffix297# copy suffix298movw (%esi),%dx299movw %dx,(%edi)3005: popl %edi301popl %esi302ret303# copy from high to low304cs_CopyLeft:305std306leal -4(%edi,%ecx,2),%edi # to + count*2 - 4307movl %eax,%esi # from + count*2 - 2308movl %ecx,%eax309subl $2,%esi # from + count*2 - 43101: sarl %ecx # dword count311jz 4f # no dwords to move312cmpl $32,%ecx313ja 3f # > 32 dwords314subl %esi,%edi315.p2align 4,,153162: movl (%esi),%edx317movl %edx,(%edi,%esi,1)318subl $4,%esi319subl $1,%ecx320jnz 2b321addl %esi,%edi322jmp 4f3233: rep; smovl3244: andl $1,%eax # suffix count325jz 5f # no suffix326# copy suffix327addl $2,%esi328addl $2,%edi329movw (%esi),%dx330movw %dx,(%edi)3315: cld332popl %edi333popl %esi334ret335336# Support for void Copy::arrayof_conjoint_jshorts(void* from,337# void* to,338# size_t count)339.p2align 4,,15340.type _Copy_arrayof_conjoint_jshorts,@function341_Copy_arrayof_conjoint_jshorts:342pushl %esi343movl 4+12(%esp),%ecx # count344pushl %edi345movl 8+ 4(%esp),%esi # from346movl 8+ 8(%esp),%edi # to347cmpl %esi,%edi348leal -2(%esi,%ecx,2),%eax # from + count*2 - 2349jbe acs_CopyRight350cmpl %eax,%edi351jbe acs_CopyLeft352acs_CopyRight:353movl %ecx,%eax # word count354sarl %ecx # dword count355jz 4f # no dwords to move356cmpl $32,%ecx357jbe 2f # <= 32 dwords358# copy aligned dwords359rep; smovl360jmp 4f361# copy aligned dwords362.space 53632: subl %esi,%edi364.p2align 4,,153653: movl (%esi),%edx366movl %edx,(%edi,%esi,1)367addl $4,%esi368subl $1,%ecx369jnz 3b370addl %esi,%edi3714: andl $1,%eax # suffix count372jz 5f # no suffix373# copy suffix374movw (%esi),%dx375movw %dx,(%edi)3765: popl %edi377popl %esi378ret379acs_CopyLeft:380std381leal -4(%edi,%ecx,2),%edi # to + count*2 - 4382movl %eax,%esi # from + count*2 - 2383movl %ecx,%eax384subl $2,%esi # from + count*2 - 4385sarl %ecx # dword count386jz 4f # no dwords to move387cmpl $32,%ecx388ja 3f # > 32 dwords389subl %esi,%edi390.p2align 4,,153912: movl (%esi),%edx392movl %edx,(%edi,%esi,1)393subl $4,%esi394subl $1,%ecx395jnz 2b396addl %esi,%edi397jmp 4f3983: rep; smovl3994: andl $1,%eax # suffix count400jz 5f # no suffix401# copy suffix402addl $2,%esi403addl $2,%edi404movw (%esi),%dx405movw %dx,(%edi)4065: cld407popl %edi408popl %esi409ret410411# Support for void Copy::conjoint_jints_atomic(void* from,412# void* to,413# size_t count)414# Equivalent to415# arrayof_conjoint_jints416.p2align 4,,15417.type _Copy_conjoint_jints_atomic,@function418.type _Copy_arrayof_conjoint_jints,@function419_Copy_conjoint_jints_atomic:420_Copy_arrayof_conjoint_jints:421pushl %esi422movl 4+12(%esp),%ecx # count423pushl %edi424movl 8+ 4(%esp),%esi # from425movl 8+ 8(%esp),%edi # to426cmpl %esi,%edi427leal -4(%esi,%ecx,4),%eax # from + count*4 - 4428jbe ci_CopyRight429cmpl %eax,%edi430jbe ci_CopyLeft431ci_CopyRight:432cmpl $32,%ecx433jbe 2f # <= 32 dwords434rep; smovl435popl %edi436popl %esi437ret438.space 104392: subl %esi,%edi440jmp 4f441.p2align 4,,154423: movl (%esi),%edx443movl %edx,(%edi,%esi,1)444addl $4,%esi4454: subl $1,%ecx446jge 3b447popl %edi448popl %esi449ret450ci_CopyLeft:451std452leal -4(%edi,%ecx,4),%edi # to + count*4 - 4453cmpl $32,%ecx454ja 4f # > 32 dwords455subl %eax,%edi # eax == from + count*4 - 4456jmp 3f457.p2align 4,,154582: movl (%eax),%edx459movl %edx,(%edi,%eax,1)460subl $4,%eax4613: subl $1,%ecx462jge 2b463cld464popl %edi465popl %esi466ret4674: movl %eax,%esi # from + count*4 - 4468rep; smovl469cld470popl %edi471popl %esi472ret473474# Support for void Copy::conjoint_jlongs_atomic(jlong* from,475# jlong* to,476# size_t count)477#478# 32-bit479#480# count treated as signed481/*482#483# if (from > to) {484# while (--count >= 0) {485# *to++ = *from++;486# }487# } else {488# while (--count >= 0) {489# to[count] = from[count];490# }491# }492*/493.p2align 4,,15494.type _Copy_conjoint_jlongs_atomic,@function495_Copy_conjoint_jlongs_atomic:496movl 4+8(%esp),%ecx # count497movl 4+0(%esp),%eax # from498movl 4+4(%esp),%edx # to499cmpl %eax,%edx500jae cla_CopyLeft501cla_CopyRight:502subl %eax,%edx503jmp 2f504.p2align 4,,155051: fildll (%eax)506fistpll (%edx,%eax,1)507addl $8,%eax5082: subl $1,%ecx509jge 1b510ret511.p2align 4,,155123: fildll (%eax,%ecx,8)513fistpll (%edx,%ecx,8)514cla_CopyLeft:515subl $1,%ecx516jge 3b517ret518519# Support for void Copy::arrayof_conjoint_jshorts(void* from,520# void* to,521# size_t count)522.p2align 4,,15523.type _mmx_Copy_arrayof_conjoint_jshorts,@function524_mmx_Copy_arrayof_conjoint_jshorts:525pushl %esi526movl 4+12(%esp),%ecx527pushl %edi528movl 8+ 4(%esp),%esi529movl 8+ 8(%esp),%edi530cmpl %esi,%edi531leal -2(%esi,%ecx,2),%eax532jbe mmx_acs_CopyRight533cmpl %eax,%edi534jbe mmx_acs_CopyLeft535mmx_acs_CopyRight:536movl %ecx,%eax537sarl %ecx538je 5f539cmpl $33,%ecx540jae 3f5411: subl %esi,%edi542.p2align 4,,155432: movl (%esi),%edx544movl %edx,(%edi,%esi,1)545addl $4,%esi546subl $1,%ecx547jnz 2b548addl %esi,%edi549jmp 5f5503: smovl # align to 8 bytes, we know we are 4 byte aligned to start551subl $1,%ecx5524: .p2align 4,,15553movq 0(%esi),%mm0554addl $64,%edi555movq 8(%esi),%mm1556subl $16,%ecx557movq 16(%esi),%mm2558movq %mm0,-64(%edi)559movq 24(%esi),%mm0560movq %mm1,-56(%edi)561movq 32(%esi),%mm1562movq %mm2,-48(%edi)563movq 40(%esi),%mm2564movq %mm0,-40(%edi)565movq 48(%esi),%mm0566movq %mm1,-32(%edi)567movq 56(%esi),%mm1568movq %mm2,-24(%edi)569movq %mm0,-16(%edi)570addl $64,%esi571movq %mm1,-8(%edi)572cmpl $16,%ecx573jge 4b574emms575testl %ecx,%ecx576ja 1b5775: andl $1,%eax578je 7f5796: movw (%esi),%dx580movw %dx,(%edi)5817: popl %edi582popl %esi583ret584mmx_acs_CopyLeft:585std586leal -4(%edi,%ecx,2),%edi587movl %eax,%esi588movl %ecx,%eax589subl $2,%esi590sarl %ecx591je 4f592cmpl $32,%ecx593ja 3f594subl %esi,%edi595.p2align 4,,155962: movl (%esi),%edx597movl %edx,(%edi,%esi,1)598subl $4,%esi599subl $1,%ecx600jnz 2b601addl %esi,%edi602jmp 4f6033: rep; smovl6044: andl $1,%eax605je 6f606addl $2,%esi607addl $2,%edi6085: movw (%esi),%dx609movw %dx,(%edi)6106: cld611popl %edi612popl %esi613ret614615616# Support for jlong Atomic::cmpxchg(volatile jlong* dest,617# jlong compare_value,618# jlong exchange_value)619#620.p2align 4,,15621.type _Atomic_cmpxchg_long,@function622_Atomic_cmpxchg_long:623# 8(%esp) : return PC624pushl %ebx # 4(%esp) : old %ebx625pushl %edi # 0(%esp) : old %edi626movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)627movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)628movl 24(%esp), %eax # 24(%esp) : compare_value (low)629movl 28(%esp), %edx # 28(%esp) : compare_value (high)630movl 20(%esp), %edi # 20(%esp) : dest631lock cmpxchg8b (%edi)632popl %edi633popl %ebx634ret635636637# Support for jlong Atomic::load and Atomic::store.638# void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)639.p2align 4,,15640.type _Atomic_move_long,@function641_Atomic_move_long:642movl 4(%esp), %eax # src643fildll (%eax)644movl 8(%esp), %eax # dest645fistpll (%eax)646ret647648649