Path: blob/main/sys/amd64/linux32/linux32_machdep.c
39507 views
/*-1* SPDX-License-Identifier: BSD-3-Clause2*3* Copyright (c) 2004 Tim J. Robbins4* Copyright (c) 2002 Doug Rabson5* Copyright (c) 2000 Marcel Moolenaar6* All rights reserved.7*8* Redistribution and use in source and binary forms, with or without9* modification, are permitted provided that the following conditions10* are met:11* 1. Redistributions of source code must retain the above copyright12* notice, this list of conditions and the following disclaimer13* in this position and unchanged.14* 2. Redistributions in binary form must reproduce the above copyright15* notice, this list of conditions and the following disclaimer in the16* documentation and/or other materials provided with the distribution.17* 3. The name of the author may not be used to endorse or promote products18* derived from this software without specific prior written permission.19*20* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR21* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES22* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.23* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,24* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT25* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,26* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY27* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT28* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF29* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.30*/3132#include <sys/param.h>33#include <sys/lock.h>34#include <sys/mutex.h>35#include <sys/priv.h>36#include <sys/proc.h>37#include <sys/reg.h>38#include <sys/syscallsubr.h>3940#include <machine/md_var.h>41#include <machine/specialreg.h>42#include <x86/ifunc.h>4344#include <compat/freebsd32/freebsd32_util.h>45#include <amd64/linux32/linux.h>46#include <amd64/linux32/linux32_proto.h>47#include <compat/linux/linux_emul.h>48#include <compat/linux/linux_fork.h>49#include <compat/linux/linux_ipc.h>50#include <compat/linux/linux_mmap.h>51#include <compat/linux/linux_signal.h>52#include <compat/linux/linux_util.h>5354static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru);5556struct l_old_select_argv {57l_int nfds;58l_uintptr_t readfds;59l_uintptr_t writefds;60l_uintptr_t exceptfds;61l_uintptr_t timeout;62} __packed;6364static void65bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)66{6768lru->ru_utime.tv_sec = ru->ru_utime.tv_sec;69lru->ru_utime.tv_usec = ru->ru_utime.tv_usec;70lru->ru_stime.tv_sec = ru->ru_stime.tv_sec;71lru->ru_stime.tv_usec = ru->ru_stime.tv_usec;72lru->ru_maxrss = ru->ru_maxrss;73lru->ru_ixrss = ru->ru_ixrss;74lru->ru_idrss = ru->ru_idrss;75lru->ru_isrss = ru->ru_isrss;76lru->ru_minflt = ru->ru_minflt;77lru->ru_majflt = ru->ru_majflt;78lru->ru_nswap = ru->ru_nswap;79lru->ru_inblock = ru->ru_inblock;80lru->ru_oublock = ru->ru_oublock;81lru->ru_msgsnd = ru->ru_msgsnd;82lru->ru_msgrcv = ru->ru_msgrcv;83lru->ru_nsignals = ru->ru_nsignals;84lru->ru_nvcsw = ru->ru_nvcsw;85lru->ru_nivcsw = ru->ru_nivcsw;86}8788int89linux_copyout_rusage(struct rusage *ru, void *uaddr)90{91struct l_rusage lru;9293bsd_to_linux_rusage(ru, &lru);9495return (copyout(&lru, uaddr, sizeof(struct l_rusage)));96}9798int99linux_readv(struct thread *td, struct linux_readv_args *uap)100{101struct uio *auio;102int error;103104error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio);105if (error)106return (error);107error = kern_readv(td, uap->fd, auio);108freeuio(auio);109return (error);110}111112struct l_ipc_kludge {113l_uintptr_t msgp;114l_long msgtyp;115} __packed;116117int118linux_ipc(struct thread *td, struct linux_ipc_args *args)119{120121switch (args->what & 0xFFFF) {122case LINUX_SEMOP: {123124return (kern_semop(td, args->arg1, PTRIN(args->ptr),125args->arg2, NULL));126}127case LINUX_SEMGET: {128struct linux_semget_args a;129130a.key = args->arg1;131a.nsems = args->arg2;132a.semflg = args->arg3;133return (linux_semget(td, &a));134}135case LINUX_SEMCTL: {136struct linux_semctl_args a;137int error;138139a.semid = args->arg1;140a.semnum = args->arg2;141a.cmd = args->arg3;142error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg));143if (error)144return (error);145return (linux_semctl(td, &a));146}147case LINUX_SEMTIMEDOP: {148struct linux_semtimedop_args a;149150a.semid = args->arg1;151a.tsops = PTRIN(args->ptr);152a.nsops = args->arg2;153a.timeout = PTRIN(args->arg5);154return (linux_semtimedop(td, &a));155}156case LINUX_MSGSND: {157struct linux_msgsnd_args a;158159a.msqid = args->arg1;160a.msgp = PTRIN(args->ptr);161a.msgsz = args->arg2;162a.msgflg = args->arg3;163return (linux_msgsnd(td, &a));164}165case LINUX_MSGRCV: {166struct linux_msgrcv_args a;167168a.msqid = args->arg1;169a.msgsz = args->arg2;170a.msgflg = args->arg3;171if ((args->what >> 16) == 0) {172struct l_ipc_kludge tmp;173int error;174175if (args->ptr == 0)176return (EINVAL);177error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp));178if (error)179return (error);180a.msgp = PTRIN(tmp.msgp);181a.msgtyp = tmp.msgtyp;182} else {183a.msgp = PTRIN(args->ptr);184a.msgtyp = args->arg5;185}186return (linux_msgrcv(td, &a));187}188case LINUX_MSGGET: {189struct linux_msgget_args a;190191a.key = args->arg1;192a.msgflg = args->arg2;193return (linux_msgget(td, &a));194}195case LINUX_MSGCTL: {196struct linux_msgctl_args a;197198a.msqid = args->arg1;199a.cmd = args->arg2;200a.buf = PTRIN(args->ptr);201return (linux_msgctl(td, &a));202}203case LINUX_SHMAT: {204struct linux_shmat_args a;205l_uintptr_t addr;206int error;207208a.shmid = args->arg1;209a.shmaddr = PTRIN(args->ptr);210a.shmflg = args->arg2;211error = linux_shmat(td, &a);212if (error != 0)213return (error);214addr = td->td_retval[0];215error = copyout(&addr, PTRIN(args->arg3), sizeof(addr));216td->td_retval[0] = 0;217return (error);218}219case LINUX_SHMDT: {220struct linux_shmdt_args a;221222a.shmaddr = PTRIN(args->ptr);223return (linux_shmdt(td, &a));224}225case LINUX_SHMGET: {226struct linux_shmget_args a;227228a.key = args->arg1;229a.size = args->arg2;230a.shmflg = args->arg3;231return (linux_shmget(td, &a));232}233case LINUX_SHMCTL: {234struct linux_shmctl_args a;235236a.shmid = args->arg1;237a.cmd = args->arg2;238a.buf = PTRIN(args->ptr);239return (linux_shmctl(td, &a));240}241default:242break;243}244245return (EINVAL);246}247248int249linux_old_select(struct thread *td, struct linux_old_select_args *args)250{251struct l_old_select_argv linux_args;252struct linux_select_args newsel;253int error;254255error = copyin(args->ptr, &linux_args, sizeof(linux_args));256if (error)257return (error);258259newsel.nfds = linux_args.nfds;260newsel.readfds = PTRIN(linux_args.readfds);261newsel.writefds = PTRIN(linux_args.writefds);262newsel.exceptfds = PTRIN(linux_args.exceptfds);263newsel.timeout = PTRIN(linux_args.timeout);264return (linux_select(td, &newsel));265}266267int268linux_set_cloned_tls(struct thread *td, void *desc)269{270struct l_user_desc info;271struct pcb *pcb;272int error;273274error = copyin(desc, &info, sizeof(struct l_user_desc));275if (error) {276linux_msg(td, "set_cloned_tls copyin info failed!");277} else {278/* We might copy out the entry_number as GUGS32_SEL. */279info.entry_number = GUGS32_SEL;280error = copyout(&info, desc, sizeof(struct l_user_desc));281if (error)282linux_msg(td, "set_cloned_tls copyout info failed!");283284pcb = td->td_pcb;285update_pcb_bases(pcb);286pcb->pcb_gsbase = (register_t)info.base_addr;287td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);288}289290return (error);291}292293int294linux_set_upcall(struct thread *td, register_t stack)295{296297if (stack)298td->td_frame->tf_rsp = stack;299300/*301* The newly created Linux thread returns302* to the user space by the same path that a parent do.303*/304td->td_frame->tf_rax = 0;305return (0);306}307308int309linux_mmap(struct thread *td, struct linux_mmap_args *args)310{311int error;312struct l_mmap_argv linux_args;313314error = copyin(args->ptr, &linux_args, sizeof(linux_args));315if (error)316return (error);317318return (linux_mmap_common(td, linux_args.addr, linux_args.len,319linux_args.prot, linux_args.flags, linux_args.fd,320(uint32_t)linux_args.pgoff));321}322323int324linux_iopl(struct thread *td, struct linux_iopl_args *args)325{326int error;327328if (args->level < 0 || args->level > 3)329return (EINVAL);330if ((error = priv_check(td, PRIV_IO)) != 0)331return (error);332if ((error = securelevel_gt(td->td_ucred, 0)) != 0)333return (error);334td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) |335(args->level * (PSL_IOPL / 3));336337return (0);338}339340int341linux_sigaction(struct thread *td, struct linux_sigaction_args *args)342{343l_osigaction_t osa;344l_sigaction_t act, oact;345int error;346347if (args->nsa != NULL) {348error = copyin(args->nsa, &osa, sizeof(l_osigaction_t));349if (error)350return (error);351act.lsa_handler = osa.lsa_handler;352act.lsa_flags = osa.lsa_flags;353act.lsa_restorer = osa.lsa_restorer;354LINUX_SIGEMPTYSET(act.lsa_mask);355act.lsa_mask.__mask = osa.lsa_mask;356}357358error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL,359args->osa ? &oact : NULL);360361if (args->osa != NULL && !error) {362osa.lsa_handler = oact.lsa_handler;363osa.lsa_flags = oact.lsa_flags;364osa.lsa_restorer = oact.lsa_restorer;365osa.lsa_mask = oact.lsa_mask.__mask;366error = copyout(&osa, args->osa, sizeof(l_osigaction_t));367}368369return (error);370}371372/*373* Linux has two extra args, restart and oldmask. We don't use these,374* but it seems that "restart" is actually a context pointer that375* enables the signal to happen with a different register set.376*/377int378linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args)379{380sigset_t sigmask;381l_sigset_t mask;382383LINUX_SIGEMPTYSET(mask);384mask.__mask = args->mask;385linux_to_bsd_sigset(&mask, &sigmask);386return (kern_sigsuspend(td, sigmask));387}388389int390linux_pause(struct thread *td, struct linux_pause_args *args)391{392struct proc *p = td->td_proc;393sigset_t sigmask;394395PROC_LOCK(p);396sigmask = td->td_sigmask;397PROC_UNLOCK(p);398return (kern_sigsuspend(td, sigmask));399}400401int402linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap)403{404struct timeval atv;405l_timeval atv32;406struct timezone rtz;407int error = 0;408409if (uap->tp) {410microtime(&atv);411atv32.tv_sec = atv.tv_sec;412atv32.tv_usec = atv.tv_usec;413error = copyout(&atv32, uap->tp, sizeof(atv32));414}415if (error == 0 && uap->tzp != NULL) {416rtz.tz_minuteswest = 0;417rtz.tz_dsttime = 0;418error = copyout(&rtz, uap->tzp, sizeof(rtz));419}420return (error);421}422423int424linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap)425{426l_timeval atv32;427struct timeval atv, *tvp;428struct timezone atz, *tzp;429int error;430431if (uap->tp) {432error = copyin(uap->tp, &atv32, sizeof(atv32));433if (error)434return (error);435atv.tv_sec = atv32.tv_sec;436atv.tv_usec = atv32.tv_usec;437tvp = &atv;438} else439tvp = NULL;440if (uap->tzp) {441error = copyin(uap->tzp, &atz, sizeof(atz));442if (error)443return (error);444tzp = &atz;445} else446tzp = NULL;447return (kern_settimeofday(td, tvp, tzp));448}449450int451linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)452{453struct rusage s;454int error;455456error = kern_getrusage(td, uap->who, &s);457if (error != 0)458return (error);459if (uap->rusage != NULL)460error = linux_copyout_rusage(&s, uap->rusage);461return (error);462}463464int465linux_set_thread_area(struct thread *td,466struct linux_set_thread_area_args *args)467{468struct l_user_desc info;469struct pcb *pcb;470int error;471472error = copyin(args->desc, &info, sizeof(struct l_user_desc));473if (error)474return (error);475476/*477* Semantics of Linux version: every thread in the system has array478* of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.479* This syscall loads one of the selected TLS descriptors with a value480* and also loads GDT descriptors 6, 7 and 8 with the content of481* the per-thread descriptors.482*483* Semantics of FreeBSD version: I think we can ignore that Linux has484* three per-thread descriptors and use just the first one.485* The tls_array[] is used only in [gs]et_thread_area() syscalls and486* for loading the GDT descriptors. We use just one GDT descriptor487* for TLS, so we will load just one.488*489* XXX: This doesn't work when a user space process tries to use more490* than one TLS segment. Comment in the Linux source says wine might491* do this.492*/493494/*495* GLIBC reads current %gs and call set_thread_area() with it.496* We should let GUDATA_SEL and GUGS32_SEL proceed as well because497* we use these segments.498*/499switch (info.entry_number) {500case GUGS32_SEL:501case GUDATA_SEL:502case 6:503case -1:504info.entry_number = GUGS32_SEL;505break;506default:507return (EINVAL);508}509510/*511* We have to copy out the GDT entry we use.512*513* XXX: What if a user space program does not check the return value514* and tries to use 6, 7 or 8?515*/516error = copyout(&info, args->desc, sizeof(struct l_user_desc));517if (error)518return (error);519520pcb = td->td_pcb;521update_pcb_bases(pcb);522pcb->pcb_gsbase = (register_t)info.base_addr;523update_gdt_gsbase(td, info.base_addr);524525return (0);526}527528void529bsd_to_linux_regset32(const struct reg32 *b_reg,530struct linux_pt_regset32 *l_regset)531{532533l_regset->ebx = b_reg->r_ebx;534l_regset->ecx = b_reg->r_ecx;535l_regset->edx = b_reg->r_edx;536l_regset->esi = b_reg->r_esi;537l_regset->edi = b_reg->r_edi;538l_regset->ebp = b_reg->r_ebp;539l_regset->eax = b_reg->r_eax;540l_regset->ds = b_reg->r_ds;541l_regset->es = b_reg->r_es;542l_regset->fs = b_reg->r_fs;543l_regset->gs = b_reg->r_gs;544l_regset->orig_eax = b_reg->r_eax;545l_regset->eip = b_reg->r_eip;546l_regset->cs = b_reg->r_cs;547l_regset->eflags = b_reg->r_eflags;548l_regset->esp = b_reg->r_esp;549l_regset->ss = b_reg->r_ss;550}551552int futex_xchgl_nosmap(int oparg, uint32_t *uaddr, int *oldval);553int futex_xchgl_smap(int oparg, uint32_t *uaddr, int *oldval);554DEFINE_IFUNC(, int, futex_xchgl, (int, uint32_t *, int *))555{556557return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?558futex_xchgl_smap : futex_xchgl_nosmap);559}560561int futex_addl_nosmap(int oparg, uint32_t *uaddr, int *oldval);562int futex_addl_smap(int oparg, uint32_t *uaddr, int *oldval);563DEFINE_IFUNC(, int, futex_addl, (int, uint32_t *, int *))564{565566return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?567futex_addl_smap : futex_addl_nosmap);568}569570int futex_orl_nosmap(int oparg, uint32_t *uaddr, int *oldval);571int futex_orl_smap(int oparg, uint32_t *uaddr, int *oldval);572DEFINE_IFUNC(, int, futex_orl, (int, uint32_t *, int *))573{574575return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?576futex_orl_smap : futex_orl_nosmap);577}578579int futex_andl_nosmap(int oparg, uint32_t *uaddr, int *oldval);580int futex_andl_smap(int oparg, uint32_t *uaddr, int *oldval);581DEFINE_IFUNC(, int, futex_andl, (int, uint32_t *, int *))582{583584return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?585futex_andl_smap : futex_andl_nosmap);586}587588int futex_xorl_nosmap(int oparg, uint32_t *uaddr, int *oldval);589int futex_xorl_smap(int oparg, uint32_t *uaddr, int *oldval);590DEFINE_IFUNC(, int, futex_xorl, (int, uint32_t *, int *))591{592593return ((cpu_stdext_feature & CPUID_STDEXT_SMAP) != 0 ?594futex_xorl_smap : futex_xorl_nosmap);595}596597int598linux_ptrace_peekuser(struct thread *td, pid_t pid, void *addr, void *data)599{600601LINUX_RATELIMIT_MSG_OPT1("PTRACE_PEEKUSER offset %ld not implemented; "602"returning EINVAL", (uintptr_t)addr);603return (EINVAL);604}605606int607linux_ptrace_pokeuser(struct thread *td, pid_t pid, void *addr, void *data)608{609610LINUX_RATELIMIT_MSG_OPT1("PTRACE_POKEUSER offset %ld "611"not implemented; returning EINVAL", (uintptr_t)addr);612return (EINVAL);613}614615616