2019-02-17

Introduction

glic uses two method to make a wrapper for system calls: one is uses the make-system.sh script to wrap and the other is uses a function and some MACROS to wrap.

After we configure and make glibc source code we can find there is a ‘sysd-syscalls’ file in ‘~/glibc-2.27/build’ directory. In the file, we can see if one system call is generated by script, it has following shape:

    #### CALL=dup NUMBER=32 ARGS=i:i SOURCE=-
    ifeq (,$(filter dup,$(unix-syscalls)))
    unix-syscalls += dup
    $(foreach p,$(sysd-rules-targets),$(foreach o,$(object-suffixes),$(objpfx)$(patsubst %,$p,dup)$o)): \
                    $(..)sysdeps/unix/make-syscalls.sh
            $(make-target-directory)
            (echo '#define SYSCALL_NAME dup'; \
            echo '#define SYSCALL_NARGS 1'; \
            echo '#define SYSCALL_SYMBOL __dup'; \
            echo '#define SYSCALL_NOERRNO 0'; \
            echo '#define SYSCALL_ERRVAL 0'; \
            echo '#include <syscall-template.S>'; \
            echo 'weak_alias (__dup, dup)'; \
            echo 'hidden_weak (dup)'; \
            ) | $(compile-syscall) $(foreach p,$(patsubst %dup,%,$(basename $(@F))),$($(p)CPPFLAGS))
    endif

If one system call is generated by c file, it has following shape:

    #### CALL=open NUMBER=2 ARGS=Ci:siv SOURCE=sysdeps/unix/sysv/linux/open.c

    #### CALL=profil NUMBER=- ARGS=i:piii SOURCE=sysdeps/unix/sysv/linux/profil.c

    #### CALL=ptrace NUMBER=101 ARGS=i:iiii SOURCE=sysdeps/unix/sysv/linux/ptrace.c

    #### CALL=read NUMBER=0 ARGS=Ci:ibn SOURCE=sysdeps/unix/sysv/linux/read.c

Script wrapper

There are three kind of files related with script wrapper: One ‘make-syscall.sh’ file, one ‘syscall-template.S’ file, and some ‘syscalls.list’ files.

The ‘glibc-2.27/sysdeps/unix/make-syscall.sh’ is a script, it reads ‘syscalls.list’ file and parses every line to generate a wrapper for system call.

The ‘syscalls.list’ has following shape:

    # File name	Caller	Syscall name	Args	Strong name	Weak names

    accept		-	accept		Ci:iBN	__libc_accept	accept
    access		-	access		i:si	__access	access
    acct		-	acct		i:S	acct
    adjtime		-	adjtime		i:pp	__adjtime	adjtime
    bind		-	bind		i:ipi	__bind		bind
    chdir		-	chdir		i:s	__chdir		chdir
    chmod		-	chmod		i:si	__chmod		chmod

This file specify the system call’s name argument, etc.

There are several syscalls.list file:

    sysdeps/unix/syscalls.list
    sysdeps/unix/sysv/linux/syscalls.list
    sysdeps/unix/sysv/linux/generic/syscalls.list
    sysdeps/unix/sysv/linux/x86_64/syscalls.list

‘syscall-template.S’ is a template file used in every script wrapper system call.

    #include <sysdep.h>

    /* This indirection is needed so that SYMBOL gets macro-expanded.  */
    #define syscall_hidden_def(SYMBOL)		hidden_def (SYMBOL)

    #define T_PSEUDO(SYMBOL, NAME, N)		PSEUDO (SYMBOL, NAME, N)
    #define T_PSEUDO_NOERRNO(SYMBOL, NAME, N)	PSEUDO_NOERRNO (SYMBOL, NAME, N)
    #define T_PSEUDO_ERRVAL(SYMBOL, NAME, N)	PSEUDO_ERRVAL (SYMBOL, NAME, N)
    #define T_PSEUDO_END(SYMBOL)			PSEUDO_END (SYMBOL)
    #define T_PSEUDO_END_NOERRNO(SYMBOL)		PSEUDO_END_NOERRNO (SYMBOL)
    #define T_PSEUDO_END_ERRVAL(SYMBOL)		PSEUDO_END_ERRVAL (SYMBOL)

    #if SYSCALL_NOERRNO

    /* This kind of system call stub never returns an error.
    We return the return value register to the caller unexamined.  */

    T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
        ret_NOERRNO
    T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL)

    #elif SYSCALL_ERRVAL

    /* This kind of system call stub returns the errno code as its return
    value, or zero for success.  We may massage the kernel's return value
    to meet that ABI, but we never set errno here.  */

    T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
        ret_ERRVAL
    T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL)

    #else

    /* This is a "normal" system call stub: if there is an error,
    it returns -1 and sets errno.  */

    T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
        ret
    T_PSEUDO_END (SYSCALL_SYMBOL)

    #endif

    syscall_hidden_def (SYSCALL_SYMBOL)

There are three kind of system call which is defined by ‘T_PSEUDO’, ‘T_PSEUDO_NOERRNO’ and ‘T_PSEUDO_ERRVAL’. If ‘SYSCALL_NOERRNO’ is defined then the system call is wrapped by ‘T_PSEUDO_NOERRNO’, this means the wrapper doesn’t return error code, for example the ‘getpid’ and ‘umask’ system call. If ‘SYSCALL_ERRVAL’ is defined then the system call is wrapped by ‘T_PSEUDO_ERRVAL’, this means the wrapper return the kernel error code directly. If neither ‘SYSCALL_NOERRNO’ nor ‘SYSCALL_ERRVAL’ are defined then the system call is wrapped by ‘T_PSEUDO’, this means the wrapper will return -1 on errors and copy the return value(error) to errno varaible.

‘T_PSEUDO’, ‘T_PSEUDO_NOERRNO’ and ‘T_PSEUDO_ERRVAL’ are in ‘sysdep.h’, which in ‘glibc-2.27/sysdeps/unix/sysv/linux/x86_64/sysdep.h’.

First of let’s see ‘PSEUDO_NOERRNO’:

    # undef	PSEUDO_NOERRNO
    # define PSEUDO_NOERRNO(name, syscall_name, args) \
    .text;								      \
    ENTRY (name)								      \
        DO_CALL (syscall_name, args)

    # undef	PSEUDO_END_NOERRNO
    # define PSEUDO_END_NOERRNO(name) \
    END (name)

Following is definition of ‘DO_CALL’:

    # undef	DO_CALL
    # define DO_CALL(syscall_name, args)		\
        DOARGS_##args				\
        movl $SYS_ify (syscall_name), %eax;		\
        syscall;

‘DO_ARGS_##args’ extends the arguments of system call. The kernel uses the following parameters:

    syscall number	rax
    arg 1		rdi
    arg 2		rsi
    arg 3		rdx
    arg 4		r10
    arg 5		r8
    arg 6		r9

However normal function call in userspace including calls to system call stub how the following parameters:

    system call number	in the DO_CALL macro
    arg 1		rdi
    arg 2		rsi
    arg 3		rdx
    arg 4		rcx
    arg 5		r8
    arg 6		r9

So the DOARGS_x has following definition:

    # define DOARGS_0 /* nothing */
    # define DOARGS_1 /* nothing */
    # define DOARGS_2 /* nothing */
    # define DOARGS_3 /* nothing */
    # define DOARGS_4 movq %rcx, %r10;
    # define DOARGS_5 DOARGS_4
    # define DOARGS_6 DOARGS_5

This means only when the system call has >=4 arguments it need to move %rcx argument to %r10.

The ‘SYS_ify’ is defined as fowllowing:

    #undef SYS_ify
    #define SYS_ify(syscall_name)	__NR_##syscall_name

So the ‘DO_CALL’ MACRO makes the system call’s arguments and moves system call number to %eax and executes ‘syscall’ instruction.

The ‘ENTRY’ and ‘END’ of ‘PSEUDO_NOERRNO’ MACRO is following:

    /* Define an entry point visible from C.  */
    #define	ENTRY(name)							      \
    .globl C_SYMBOL_NAME(name);						      \
    .type C_SYMBOL_NAME(name),@function;					      \
    .align ALIGNARG(4);							      \
    C_LABEL(name)								      \
    cfi_startproc;							      \
    CALL_MCOUNT

    #undef	END
    #define END(name)							      \
    cfi_endproc;								      \
    ASM_SIZE_DIRECTIVE(name)

No speical just some standard definition.

So for ‘PSEUDO_NOERRNO’, this means the system doesn’t return error, so glibc doesn’t need to do anything for the return value.

For ‘PSEUDO_ERRVAL’, it just return the negtive number of error.

    # undef	PSEUDO_ERRVAL
    # define PSEUDO_ERRVAL(name, syscall_name, args) \
    .text;								      \
    ENTRY (name)								      \
        DO_CALL (syscall_name, args);					      \
        negq %rax

    # undef	PSEUDO_END_ERRVAL
    # define PSEUDO_END_ERRVAL(name) \
    END (name)

For ‘PSEUDO’, it checks the return value with -4095, if the return value >= -4095(0xfffffffffffff001), this means the system call in kernel returns an error. glibc to ‘SYSCALL_ERROR_LABEL’ to handle this.

    # undef	PSEUDO
    # define PSEUDO(name, syscall_name, args)				      \
    .text;								      \
    ENTRY (name)								      \
        DO_CALL (syscall_name, args);					      \
        cmpq $-4095, %rax;							      \
        jae SYSCALL_ERROR_LABEL

    # undef	PSEUDO_END
    # define PSEUDO_END(name)						      \
    SYSCALL_ERROR_HANDLER							      \
    END (name)

‘SYSCALL_ERROR_LABEL’ is defined as following:

    # ifdef PIC
    #  define SYSCALL_ERROR_LABEL 0f
    # else
    #  define SYSCALL_ERROR_LABEL syscall_error
    # endif

For no PIC defined:

    #define	syscall_error	__syscall_error
    int
    __attribute__ ((__regparm__ (1)))
    __syscall_error (int error)
    {
    __set_errno (-error);
    return -1;
    }

So for ‘PSEUDO’, the glibc set the errno with the kernel return error adn return -1 for the wrapper function.

Following is an example of dup system call wrapper.

    00000000000e4ea0 <dup>:
    e4ea0:       b8 20 00 00 00          mov    $0x20,%eax
    e4ea5:       0f 05                   syscall
    e4ea7:       48 3d 01 f0 ff ff       cmp    $0xfffffffffffff001,%rax
    e4ead:       73 01                   jae    e4eb0 <dup+0x10>
    e4eaf:       c3                      retq
    e4eb0:       48 8b 0d b1 8f 2c 00    mov    0x2c8fb1(%rip),%rcx        # 3ade68 <.got+0x108>
    e4eb7:       f7 d8                   neg    %eax
    e4eb9:       64 89 01                mov    %eax,%fs:(%rcx)
    e4ebc:       48 83 c8 ff             or     $0xffffffffffffffff,%rax
    e4ec0:       c3                      retq
    e4ec1:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
    e4ec8:       00 00 00
    e4ecb:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)

C file wrapper

As I have said, there are another C file wrapper, it defines system call wrapper in a C file. For example ‘sysd-syscalls’ has following line:

    #### CALL=read NUMBER=0 ARGS=Ci:ibn SOURCE=sysdeps/unix/sysv/linux/read.c

Both ‘__libc_read’ and ‘__read_nocancel’ will call:

    INLINE_SYSCALL_CALL (read, fd, buf, nbytes);

    #define INLINE_SYSCALL_CALL(...) \
    __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)

So ‘INLINE_SYSCALL_CALL’ will extend to:

    __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, read, fd, buf, nbytes)

    #define __INLINE_SYSCALL_DISP(b,...) \
    __SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)

MACRO __INLINE_SYSCALL_NARGS(read, fd, buf, nbytes) extend to:

    __INLINE_SYSCALL_NARGS_X (read, fd, buf, nbytes,7,6,5,4,3,2,1,0,)

This will finally generate to 3.

So ‘ __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, read, fd, buf, nbytes)’ is extended to:

    __INLINE_SYSCALL3(read, fd, buf, nbytes)

As:

    #define __INLINE_SYSCALL3(name, a1, a2, a3) \
    INLINE_SYSCALL (name, 3, a1, a2, a3)

extended to:

    INLINE_SYSCALL(read, 3, fd, buf, nbytes)

This MACRO is defined as following:

    # undef INLINE_SYSCALL
    # define INLINE_SYSCALL(name, nr, args...) \
    ({									      \
        unsigned long int resultvar = INTERNAL_SYSCALL (name, , nr, args);	      \
        if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (resultvar, )))	      \
        {									      \
        __set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, ));		      \
        resultvar = (unsigned long int) -1;				      \
        }									      \
        (long int) resultvar; })

Here go to the ‘INTERNAL_SYSCALL’ MACRO:

    #undef INTERNAL_SYSCALL
    #define INTERNAL_SYSCALL(name, err, nr, args...)			\
        internal_syscall##nr (SYS_ify (name), err, args)

For ‘internal_syscall3’:

    #undef internal_syscall3
    #define internal_syscall3(number, err, arg1, arg2, arg3)		\
    ({									\
        unsigned long int resultvar;					\
        TYPEFY (arg3, __arg3) = ARGIFY (arg3);			 	\
        TYPEFY (arg2, __arg2) = ARGIFY (arg2);			 	\
        TYPEFY (arg1, __arg1) = ARGIFY (arg1);			 	\
        register TYPEFY (arg3, _a3) asm ("rdx") = __arg3;			\
        register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;			\
        register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;			\
        asm volatile (							\
        "syscall\n\t"							\
        : "=a" (resultvar)							\
        : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3)			\
        : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);			\
        (long int) resultvar;						\
    })

So this function finally makes the three arguments and trigger syscall and return ‘resultvar’.

Go to ‘INLINE_SYSCALL’ MACRO:

If ‘resultvar’ is an error, glibc assign this -resultvar to errno and return -1 as the wrapper’s return value.



blog comments powered by Disqus