Path: blob/master/src/hotspot/os_cpu/linux_x86/linux_x86_64.S
40951 views
#1# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.2# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3#4# This code is free software; you can redistribute it and/or modify it5# under the terms of the GNU General Public License version 2 only, as6# published by the Free Software Foundation.7#8# This code is distributed in the hope that it will be useful, but WITHOUT9# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11# version 2 for more details (a copy is included in the LICENSE file that12# accompanied this code).13#14# You should have received a copy of the GNU General Public License version15# 2 along with this work; if not, write to the Free Software Foundation,16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17#18# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19# or visit www.oracle.com if you need additional information or have any20# questions.21#222324# NOTE WELL! The _Copy functions are called directly25# from server-compiler-generated code via CallLeafNoFP,26# which means that they *must* either not use floating27# point or use it in the same manner as does the server28# compiler.2930.globl _Copy_arrayof_conjoint_bytes31.globl _Copy_arrayof_conjoint_jshorts32.globl _Copy_conjoint_jshorts_atomic33.globl _Copy_arrayof_conjoint_jints34.globl _Copy_conjoint_jints_atomic35.globl _Copy_arrayof_conjoint_jlongs36.globl _Copy_conjoint_jlongs_atomic3738.text3940.globl SpinPause41.align 1642.type SpinPause,@function43SpinPause:44rep45nop46movq $1, %rax47ret4849# Support for void Copy::arrayof_conjoint_bytes(void* from,50# void* to,51# size_t count)52# rdi - from53# rsi - to54# rdx - count, treated as ssize_t55#56.p2align 4,,1557.type _Copy_arrayof_conjoint_bytes,@function58_Copy_arrayof_conjoint_bytes:59movq %rdx,%r8 # byte count60shrq $3,%rdx # qword count61cmpq %rdi,%rsi62leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 163jbe acb_CopyRight64cmpq %rax,%rsi65jbe acb_CopyLeft66acb_CopyRight:67leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 868leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 869negq %rdx70jmp 7f71.p2align 4,,15721: movq 8(%rax,%rdx,8),%rsi73movq %rsi,8(%rcx,%rdx,8)74addq $1,%rdx75jnz 1b762: testq $4,%r8 # check for trailing dword77jz 3f78movl 8(%rax),%esi # copy trailing dword79movl %esi,8(%rcx)80addq $4,%rax81addq $4,%rcx # original %rsi is trashed, so we82# can't use it as a base register833: testq $2,%r8 # check for trailing word84jz 4f85movw 8(%rax),%si # copy trailing word86movw %si,8(%rcx)87addq $2,%rcx884: testq $1,%r8 # check for trailing byte89jz 5f90movb -1(%rdi,%r8,1),%al # copy trailing byte91movb %al,8(%rcx)925: ret93.p2align 4,,15946: movq -24(%rax,%rdx,8),%rsi95movq %rsi,-24(%rcx,%rdx,8)96movq -16(%rax,%rdx,8),%rsi97movq %rsi,-16(%rcx,%rdx,8)98movq -8(%rax,%rdx,8),%rsi99movq %rsi,-8(%rcx,%rdx,8)100movq (%rax,%rdx,8),%rsi101movq %rsi,(%rcx,%rdx,8)1027: addq $4,%rdx103jle 6b104subq $4,%rdx105jl 1b106jmp 2b107acb_CopyLeft:108testq $1,%r8 # check for trailing byte109jz 1f110movb -1(%rdi,%r8,1),%cl # copy trailing byte111movb %cl,-1(%rsi,%r8,1)112subq $1,%r8 # adjust for possible trailing word1131: testq $2,%r8 # check for trailing word114jz 2f115movw -2(%rdi,%r8,1),%cx # copy trailing word116movw %cx,-2(%rsi,%r8,1)1172: testq $4,%r8 # check for trailing dword118jz 5f119movl (%rdi,%rdx,8),%ecx # copy trailing dword120movl %ecx,(%rsi,%rdx,8)121jmp 5f122.p2align 4,,151233: movq -8(%rdi,%rdx,8),%rcx124movq %rcx,-8(%rsi,%rdx,8)125subq $1,%rdx126jnz 3b127ret128.p2align 4,,151294: movq 24(%rdi,%rdx,8),%rcx130movq %rcx,24(%rsi,%rdx,8)131movq 16(%rdi,%rdx,8),%rcx132movq %rcx,16(%rsi,%rdx,8)133movq 8(%rdi,%rdx,8),%rcx134movq %rcx,8(%rsi,%rdx,8)135movq (%rdi,%rdx,8),%rcx136movq %rcx,(%rsi,%rdx,8)1375: subq $4,%rdx138jge 4b139addq $4,%rdx140jg 3b141ret142143# Support for void Copy::arrayof_conjoint_jshorts(void* from,144# void* to,145# size_t count)146# Equivalent to147# conjoint_jshorts_atomic148#149# If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we150# let the hardware handle it. The tow or four words within dwords151# or qwords that span cache line boundaries will still be loaded152# and stored atomically.153#154# rdi - from155# rsi - to156# rdx - count, treated as ssize_t157#158.p2align 4,,15159.type _Copy_arrayof_conjoint_jshorts,@function160.type _Copy_conjoint_jshorts_atomic,@function161_Copy_arrayof_conjoint_jshorts:162_Copy_conjoint_jshorts_atomic:163movq %rdx,%r8 # word count164shrq $2,%rdx # qword count165cmpq %rdi,%rsi166leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2167jbe acs_CopyRight168cmpq %rax,%rsi169jbe acs_CopyLeft170acs_CopyRight:171leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8172leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8173negq %rdx174jmp 6f1751: movq 8(%rax,%rdx,8),%rsi176movq %rsi,8(%rcx,%rdx,8)177addq $1,%rdx178jnz 1b1792: testq $2,%r8 # check for trailing dword180jz 3f181movl 8(%rax),%esi # copy trailing dword182movl %esi,8(%rcx)183addq $4,%rcx # original %rsi is trashed, so we184# can't use it as a base register1853: testq $1,%r8 # check for trailing word186jz 4f187movw -2(%rdi,%r8,2),%si # copy trailing word188movw %si,8(%rcx)1894: ret190.p2align 4,,151915: movq -24(%rax,%rdx,8),%rsi192movq %rsi,-24(%rcx,%rdx,8)193movq -16(%rax,%rdx,8),%rsi194movq %rsi,-16(%rcx,%rdx,8)195movq -8(%rax,%rdx,8),%rsi196movq %rsi,-8(%rcx,%rdx,8)197movq (%rax,%rdx,8),%rsi198movq %rsi,(%rcx,%rdx,8)1996: addq $4,%rdx200jle 5b201subq $4,%rdx202jl 1b203jmp 2b204acs_CopyLeft:205testq $1,%r8 # check for trailing word206jz 1f207movw -2(%rdi,%r8,2),%cx # copy trailing word208movw %cx,-2(%rsi,%r8,2)2091: testq $2,%r8 # check for trailing dword210jz 4f211movl (%rdi,%rdx,8),%ecx # copy trailing dword212movl %ecx,(%rsi,%rdx,8)213jmp 4f2142: movq -8(%rdi,%rdx,8),%rcx215movq %rcx,-8(%rsi,%rdx,8)216subq $1,%rdx217jnz 2b218ret219.p2align 4,,152203: movq 24(%rdi,%rdx,8),%rcx221movq %rcx,24(%rsi,%rdx,8)222movq 16(%rdi,%rdx,8),%rcx223movq %rcx,16(%rsi,%rdx,8)224movq 8(%rdi,%rdx,8),%rcx225movq %rcx,8(%rsi,%rdx,8)226movq (%rdi,%rdx,8),%rcx227movq %rcx,(%rsi,%rdx,8)2284: subq $4,%rdx229jge 3b230addq $4,%rdx231jg 2b232ret233234# Support for void Copy::arrayof_conjoint_jints(jint* from,235# jint* to,236# size_t count)237# Equivalent to238# conjoint_jints_atomic239#240# If 'from' and/or 'to' are aligned on 4-byte boundaries, we let241# the hardware handle it. The two dwords within qwords that span242# cache line boundaries will still be loaded and stored atomically.243#244# rdi - from245# rsi - to246# rdx - count, treated as ssize_t247#248.p2align 4,,15249.type _Copy_arrayof_conjoint_jints,@function250.type _Copy_conjoint_jints_atomic,@function251_Copy_arrayof_conjoint_jints:252_Copy_conjoint_jints_atomic:253movq %rdx,%r8 # dword count254shrq %rdx # qword count255cmpq %rdi,%rsi256leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4257jbe aci_CopyRight258cmpq %rax,%rsi259jbe aci_CopyLeft260aci_CopyRight:261leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8262leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8263negq %rdx264jmp 5f265.p2align 4,,152661: movq 8(%rax,%rdx,8),%rsi267movq %rsi,8(%rcx,%rdx,8)268addq $1,%rdx269jnz 1b2702: testq $1,%r8 # check for trailing dword271jz 3f272movl 8(%rax),%esi # copy trailing dword273movl %esi,8(%rcx)2743: ret275.p2align 4,,152764: movq -24(%rax,%rdx,8),%rsi277movq %rsi,-24(%rcx,%rdx,8)278movq -16(%rax,%rdx,8),%rsi279movq %rsi,-16(%rcx,%rdx,8)280movq -8(%rax,%rdx,8),%rsi281movq %rsi,-8(%rcx,%rdx,8)282movq (%rax,%rdx,8),%rsi283movq %rsi,(%rcx,%rdx,8)2845: addq $4,%rdx285jle 4b286subq $4,%rdx287jl 1b288jmp 2b289aci_CopyLeft:290testq $1,%r8 # check for trailing dword291jz 3f292movl -4(%rdi,%r8,4),%ecx # copy trailing dword293movl %ecx,-4(%rsi,%r8,4)294jmp 3f2951: movq -8(%rdi,%rdx,8),%rcx296movq %rcx,-8(%rsi,%rdx,8)297subq $1,%rdx298jnz 1b299ret300.p2align 4,,153012: movq 24(%rdi,%rdx,8),%rcx302movq %rcx,24(%rsi,%rdx,8)303movq 16(%rdi,%rdx,8),%rcx304movq %rcx,16(%rsi,%rdx,8)305movq 8(%rdi,%rdx,8),%rcx306movq %rcx,8(%rsi,%rdx,8)307movq (%rdi,%rdx,8),%rcx308movq %rcx,(%rsi,%rdx,8)3093: subq $4,%rdx310jge 2b311addq $4,%rdx312jg 1b313ret314315# Support for void Copy::arrayof_conjoint_jlongs(jlong* from,316# jlong* to,317# size_t count)318# Equivalent to319# conjoint_jlongs_atomic320# arrayof_conjoint_oops321# conjoint_oops_atomic322#323# rdi - from324# rsi - to325# rdx - count, treated as ssize_t326#327.p2align 4,,15328.type _Copy_arrayof_conjoint_jlongs,@function329.type _Copy_conjoint_jlongs_atomic,@function330_Copy_arrayof_conjoint_jlongs:331_Copy_conjoint_jlongs_atomic:332cmpq %rdi,%rsi333leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8334jbe acl_CopyRight335cmpq %rax,%rsi336jbe acl_CopyLeft337acl_CopyRight:338leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8339negq %rdx340jmp 3f3411: movq 8(%rax,%rdx,8),%rsi342movq %rsi,8(%rcx,%rdx,8)343addq $1,%rdx344jnz 1b345ret346.p2align 4,,153472: movq -24(%rax,%rdx,8),%rsi348movq %rsi,-24(%rcx,%rdx,8)349movq -16(%rax,%rdx,8),%rsi350movq %rsi,-16(%rcx,%rdx,8)351movq -8(%rax,%rdx,8),%rsi352movq %rsi,-8(%rcx,%rdx,8)353movq (%rax,%rdx,8),%rsi354movq %rsi,(%rcx,%rdx,8)3553: addq $4,%rdx356jle 2b357subq $4,%rdx358jl 1b359ret3604: movq -8(%rdi,%rdx,8),%rcx361movq %rcx,-8(%rsi,%rdx,8)362subq $1,%rdx363jnz 4b364ret365.p2align 4,,153665: movq 24(%rdi,%rdx,8),%rcx367movq %rcx,24(%rsi,%rdx,8)368movq 16(%rdi,%rdx,8),%rcx369movq %rcx,16(%rsi,%rdx,8)370movq 8(%rdi,%rdx,8),%rcx371movq %rcx,8(%rsi,%rdx,8)372movq (%rdi,%rdx,8),%rcx373movq %rcx,(%rsi,%rdx,8)374acl_CopyLeft:375subq $4,%rdx376jge 5b377addq $4,%rdx378jg 4b379ret380381382