Path: blob/master/src/hotspot/os_cpu/bsd_x86/bsd_x86_64.S
40930 views
#1# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.2# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3#4# This code is free software; you can redistribute it and/or modify it5# under the terms of the GNU General Public License version 2 only, as6# published by the Free Software Foundation.7#8# This code is distributed in the hope that it will be useful, but WITHOUT9# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11# version 2 for more details (a copy is included in the LICENSE file that12# accompanied this code).13#14# You should have received a copy of the GNU General Public License version15# 2 along with this work; if not, write to the Free Software Foundation,16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17#18# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19# or visit www.oracle.com if you need additional information or have any20# questions.21#2223#ifdef __APPLE__24# Darwin uses _ prefixed global symbols25#define SYMBOL(s) _ ## s26#define ELF_TYPE(name, description)27#else28#define SYMBOL(s) s29#define ELF_TYPE(name, description) .type name,description30#endif3132# NOTE WELL! The _Copy functions are called directly33# from server-compiler-generated code via CallLeafNoFP,34# which means that they *must* either not use floating35# point or use it in the same manner as does the server36# compiler.3738.globl SYMBOL(_Copy_arrayof_conjoint_bytes)39.globl SYMBOL(_Copy_arrayof_conjoint_jshorts)40.globl SYMBOL(_Copy_conjoint_jshorts_atomic)41.globl SYMBOL(_Copy_arrayof_conjoint_jints)42.globl SYMBOL(_Copy_conjoint_jints_atomic)43.globl SYMBOL(_Copy_arrayof_conjoint_jlongs)44.globl SYMBOL(_Copy_conjoint_jlongs_atomic)4546.text4748.globl SYMBOL(SpinPause)49.p2align 4,,1550ELF_TYPE(SpinPause,@function)51SYMBOL(SpinPause):52rep53nop54movq $1, %rax55ret5657# Support for void Copy::arrayof_conjoint_bytes(void* from,58# void* to,59# size_t count)60# rdi - from61# rsi - to62# rdx - count, treated as ssize_t63#64.p2align 4,,1565ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)66SYMBOL(_Copy_arrayof_conjoint_bytes):67movq %rdx,%r8 # byte count68shrq $3,%rdx # qword count69cmpq %rdi,%rsi70leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 171jbe acb_CopyRight72cmpq %rax,%rsi73jbe acb_CopyLeft74acb_CopyRight:75leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 876leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 877negq %rdx78jmp 7f79.p2align 4,,15801: movq 8(%rax,%rdx,8),%rsi81movq %rsi,8(%rcx,%rdx,8)82addq $1,%rdx83jnz 1b842: testq $4,%r8 # check for trailing dword85jz 3f86movl 8(%rax),%esi # copy trailing dword87movl %esi,8(%rcx)88addq $4,%rax89addq $4,%rcx # original %rsi is trashed, so we90# can't use it as a base register913: testq $2,%r8 # check for trailing word92jz 4f93movw 8(%rax),%si # copy trailing word94movw %si,8(%rcx)95addq $2,%rcx964: testq $1,%r8 # check for trailing byte97jz 5f98movb -1(%rdi,%r8,1),%al # copy trailing byte99movb %al,8(%rcx)1005: ret101.p2align 4,,151026: movq -24(%rax,%rdx,8),%rsi103movq %rsi,-24(%rcx,%rdx,8)104movq -16(%rax,%rdx,8),%rsi105movq %rsi,-16(%rcx,%rdx,8)106movq -8(%rax,%rdx,8),%rsi107movq %rsi,-8(%rcx,%rdx,8)108movq (%rax,%rdx,8),%rsi109movq %rsi,(%rcx,%rdx,8)1107: addq $4,%rdx111jle 6b112subq $4,%rdx113jl 1b114jmp 2b115acb_CopyLeft:116testq $1,%r8 # check for trailing byte117jz 1f118movb -1(%rdi,%r8,1),%cl # copy trailing byte119movb %cl,-1(%rsi,%r8,1)120subq $1,%r8 # adjust for possible trailing word1211: testq $2,%r8 # check for trailing word122jz 2f123movw -2(%rdi,%r8,1),%cx # copy trailing word124movw %cx,-2(%rsi,%r8,1)1252: testq $4,%r8 # check for trailing dword126jz 5f127movl (%rdi,%rdx,8),%ecx # copy trailing dword128movl %ecx,(%rsi,%rdx,8)129jmp 5f130.p2align 4,,151313: movq -8(%rdi,%rdx,8),%rcx132movq %rcx,-8(%rsi,%rdx,8)133subq $1,%rdx134jnz 3b135ret136.p2align 4,,151374: movq 24(%rdi,%rdx,8),%rcx138movq %rcx,24(%rsi,%rdx,8)139movq 16(%rdi,%rdx,8),%rcx140movq %rcx,16(%rsi,%rdx,8)141movq 8(%rdi,%rdx,8),%rcx142movq %rcx,8(%rsi,%rdx,8)143movq (%rdi,%rdx,8),%rcx144movq %rcx,(%rsi,%rdx,8)1455: subq $4,%rdx146jge 4b147addq $4,%rdx148jg 3b149ret150151# Support for void Copy::arrayof_conjoint_jshorts(void* from,152# void* to,153# size_t count)154# Equivalent to155# conjoint_jshorts_atomic156#157# If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we158# let the hardware handle it. The tow or four words within dwords159# or qwords that span cache line boundaries will still be loaded160# and stored atomically.161#162# rdi - from163# rsi - to164# rdx - count, treated as ssize_t165#166.p2align 4,,15167ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)168ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)169SYMBOL(_Copy_arrayof_conjoint_jshorts):170SYMBOL(_Copy_conjoint_jshorts_atomic):171movq %rdx,%r8 # word count172shrq $2,%rdx # qword count173cmpq %rdi,%rsi174leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2175jbe acs_CopyRight176cmpq %rax,%rsi177jbe acs_CopyLeft178acs_CopyRight:179leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8180leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8181negq %rdx182jmp 6f1831: movq 8(%rax,%rdx,8),%rsi184movq %rsi,8(%rcx,%rdx,8)185addq $1,%rdx186jnz 1b1872: testq $2,%r8 # check for trailing dword188jz 3f189movl 8(%rax),%esi # copy trailing dword190movl %esi,8(%rcx)191addq $4,%rcx # original %rsi is trashed, so we192# can't use it as a base register1933: testq $1,%r8 # check for trailing word194jz 4f195movw -2(%rdi,%r8,2),%si # copy trailing word196movw %si,8(%rcx)1974: ret198.p2align 4,,151995: movq -24(%rax,%rdx,8),%rsi200movq %rsi,-24(%rcx,%rdx,8)201movq -16(%rax,%rdx,8),%rsi202movq %rsi,-16(%rcx,%rdx,8)203movq -8(%rax,%rdx,8),%rsi204movq %rsi,-8(%rcx,%rdx,8)205movq (%rax,%rdx,8),%rsi206movq %rsi,(%rcx,%rdx,8)2076: addq $4,%rdx208jle 5b209subq $4,%rdx210jl 1b211jmp 2b212acs_CopyLeft:213testq $1,%r8 # check for trailing word214jz 1f215movw -2(%rdi,%r8,2),%cx # copy trailing word216movw %cx,-2(%rsi,%r8,2)2171: testq $2,%r8 # check for trailing dword218jz 4f219movl (%rdi,%rdx,8),%ecx # copy trailing dword220movl %ecx,(%rsi,%rdx,8)221jmp 4f2222: movq -8(%rdi,%rdx,8),%rcx223movq %rcx,-8(%rsi,%rdx,8)224subq $1,%rdx225jnz 2b226ret227.p2align 4,,152283: movq 24(%rdi,%rdx,8),%rcx229movq %rcx,24(%rsi,%rdx,8)230movq 16(%rdi,%rdx,8),%rcx231movq %rcx,16(%rsi,%rdx,8)232movq 8(%rdi,%rdx,8),%rcx233movq %rcx,8(%rsi,%rdx,8)234movq (%rdi,%rdx,8),%rcx235movq %rcx,(%rsi,%rdx,8)2364: subq $4,%rdx237jge 3b238addq $4,%rdx239jg 2b240ret241242# Support for void Copy::arrayof_conjoint_jints(jint* from,243# jint* to,244# size_t count)245# Equivalent to246# conjoint_jints_atomic247#248# If 'from' and/or 'to' are aligned on 4-byte boundaries, we let249# the hardware handle it. The two dwords within qwords that span250# cache line boundaries will still be loaded and stored atomically.251#252# rdi - from253# rsi - to254# rdx - count, treated as ssize_t255#256.p2align 4,,15257ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)258ELF_TYPE(_Copy_conjoint_jints_atomic,@function)259SYMBOL(_Copy_arrayof_conjoint_jints):260SYMBOL(_Copy_conjoint_jints_atomic):261movq %rdx,%r8 # dword count262shrq %rdx # qword count263cmpq %rdi,%rsi264leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4265jbe aci_CopyRight266cmpq %rax,%rsi267jbe aci_CopyLeft268aci_CopyRight:269leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8270leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8271negq %rdx272jmp 5f273.p2align 4,,152741: movq 8(%rax,%rdx,8),%rsi275movq %rsi,8(%rcx,%rdx,8)276addq $1,%rdx277jnz 1b2782: testq $1,%r8 # check for trailing dword279jz 3f280movl 8(%rax),%esi # copy trailing dword281movl %esi,8(%rcx)2823: ret283.p2align 4,,152844: movq -24(%rax,%rdx,8),%rsi285movq %rsi,-24(%rcx,%rdx,8)286movq -16(%rax,%rdx,8),%rsi287movq %rsi,-16(%rcx,%rdx,8)288movq -8(%rax,%rdx,8),%rsi289movq %rsi,-8(%rcx,%rdx,8)290movq (%rax,%rdx,8),%rsi291movq %rsi,(%rcx,%rdx,8)2925: addq $4,%rdx293jle 4b294subq $4,%rdx295jl 1b296jmp 2b297aci_CopyLeft:298testq $1,%r8 # check for trailing dword299jz 3f300movl -4(%rdi,%r8,4),%ecx # copy trailing dword301movl %ecx,-4(%rsi,%r8,4)302jmp 3f3031: movq -8(%rdi,%rdx,8),%rcx304movq %rcx,-8(%rsi,%rdx,8)305subq $1,%rdx306jnz 1b307ret308.p2align 4,,153092: movq 24(%rdi,%rdx,8),%rcx310movq %rcx,24(%rsi,%rdx,8)311movq 16(%rdi,%rdx,8),%rcx312movq %rcx,16(%rsi,%rdx,8)313movq 8(%rdi,%rdx,8),%rcx314movq %rcx,8(%rsi,%rdx,8)315movq (%rdi,%rdx,8),%rcx316movq %rcx,(%rsi,%rdx,8)3173: subq $4,%rdx318jge 2b319addq $4,%rdx320jg 1b321ret322323# Support for void Copy::arrayof_conjoint_jlongs(jlong* from,324# jlong* to,325# size_t count)326# Equivalent to327# conjoint_jlongs_atomic328# arrayof_conjoint_oops329# conjoint_oops_atomic330#331# rdi - from332# rsi - to333# rdx - count, treated as ssize_t334#335.p2align 4,,15336ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)337ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)338SYMBOL(_Copy_arrayof_conjoint_jlongs):339SYMBOL(_Copy_conjoint_jlongs_atomic):340cmpq %rdi,%rsi341leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8342jbe acl_CopyRight343cmpq %rax,%rsi344jbe acl_CopyLeft345acl_CopyRight:346leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8347negq %rdx348jmp 3f3491: movq 8(%rax,%rdx,8),%rsi350movq %rsi,8(%rcx,%rdx,8)351addq $1,%rdx352jnz 1b353ret354.p2align 4,,153552: movq -24(%rax,%rdx,8),%rsi356movq %rsi,-24(%rcx,%rdx,8)357movq -16(%rax,%rdx,8),%rsi358movq %rsi,-16(%rcx,%rdx,8)359movq -8(%rax,%rdx,8),%rsi360movq %rsi,-8(%rcx,%rdx,8)361movq (%rax,%rdx,8),%rsi362movq %rsi,(%rcx,%rdx,8)3633: addq $4,%rdx364jle 2b365subq $4,%rdx366jl 1b367ret3684: movq -8(%rdi,%rdx,8),%rcx369movq %rcx,-8(%rsi,%rdx,8)370subq $1,%rdx371jnz 4b372ret373.p2align 4,,153745: movq 24(%rdi,%rdx,8),%rcx375movq %rcx,24(%rsi,%rdx,8)376movq 16(%rdi,%rdx,8),%rcx377movq %rcx,16(%rsi,%rdx,8)378movq 8(%rdi,%rdx,8),%rcx379movq %rcx,8(%rsi,%rdx,8)380movq (%rdi,%rdx,8),%rcx381movq %rcx,(%rsi,%rdx,8)382acl_CopyLeft:383subq $4,%rdx384jge 5b385addq $4,%rdx386jg 4b387ret388389390