14 Linking

14 minute read

Published:

Linker

seald

Minimal ELF

Just generating Ehdr, fill out all the files

./gen-exe
# creates a.out, a binary file with *just* the header (64b)
readelf -a a.out

No data section, no linking with standard library

Has a “section to segment mapping”

Minimal exe

Elf header, program header, .txt, section header, but no strings Assemble hello.s into object

cpp hello.c hello.i
cc1 hello.i -Og -o hello.s
as -o hello.o hello.s      # assemble hello.s
ld -o hello hello.o 

hello.s

  • Push 4-byte integer “\nPSA” (written backwards) onto the stack
  • Setup syscall registers to write(1, "ASP\n", 4)
  • exit(0) _start: declares a function. .globl makes it have a symbol (global function)
    readelf hello
    # a few more symbols from real linker
    ./cld hello.o
    # creates a.out
    readelf ./a.out
    

    Sections will be defined, but <no-strings>, since don’t have .shstr tab

  • Put .text int offset 0x1000 (dec 4096, a page)
  • Each segment must start at a page boundary (since mmap()ed)
  • If not a new segment, can start anywhere. There may still be alignment requirements

Relocation

Link single object file, compiled from C echo.c

char newline[] = "\n";
// shorthand newline[2] = { '\n', '\0' };
// STACK allocated array

char *newline = "\n";      
// in read-only section, declare a stack-allocated pointer
// can't have, since we don't yet have a .rodata section

Need to write my_libc.h to handle write() as a system call, adapted from the Linux kernel

  1. Define constants
     #ifndef __MY_LIBC_H__
     #define __MY_LIBC_H__
    
     /*
     * Header file for minimal C standard library.
     */
    
     // Standard stream file descriptors
     #define STDIN_FILENO  0
     #define STDOUT_FILENO 1
     #define STDERR_FILENO 2
    
     // System call numbers for x86_64 Linux
     #define __NR_write 1
     #define __NR_exit 60
    
  2. syscall() is implemented as an inline assembly function (mixing assembly inside of C code)
     static inline long syscall(long n, long a1, long a2, long a3, long a4, long a5, long a6) {
         long ret;
    
         // map variables to specific registers
         register long _num  asm("rax") = n;
         register long _arg1 asm("rdi") = a1;
         register long _arg2 asm("rsi") = a2;
         register long _arg3 asm("rdx") = a3;
         register long _arg4 asm("r10") = a4;
         register long _arg5 asm("r8")  = a5;
         register long _arg6 asm("r9")  = a6;
    
         // map variables to registers, with readonly, writeonly, ...
         asm volatile (
             "syscall\n\t"
             : "=a" (ret)
             : "0" (_num), "r" (_arg1), "r" (_arg2), "r" (_arg3), "r" (_arg4),
             "r" (_arg5), "r" (_arg6)
             : "memory", "cc", "r11", "rcx"
         );
    
         return ret;
     }
    
  3. Now write exit() and write() syscalls
    • Defined as static, only usable in this compilation unit (only C files that #include this header)
    • In the ELF files, those symbols will be defined as local, won’t get linked
     static inline __attribute__((noreturn)) void exit(int code) {
         syscall(__NR_exit, code, 0, 0, 0, 0, 0);
         __builtin_unreachable();    // exit() is not suppose to return
     }
    
     static inline void write(int fd, const char *buf, unsigned long len) {
         syscall(__NR_write, fd, (long)buf, len, 0, 0, 0);
     }
    
  4. Loader jumps to _start for entry point

     /*
     * Sets up the environment for a C program and calls main().
     */
     // global functions
     void _start_c(long *sp) {
         // setup the argv array
         long argc;
         char **argv;
         char **envp;
         int ret;
    
         // Silence warnings about different types for main()
         int _mylib_main(int, char **, char **) __asm__ ("main");
         //			   (argc, argv, envar)
         /*
         * sp  :  argc          <-- argument count, required by main()
         * argv:  argv[0]       <-- argument vector, required by main()
         *        argv[1]
         *        ...
         *        argv[argc-1]
         *        NULL
         * envp:  envp[0]       <-- environment variables, required by main()/getenv()
         *        envp[1]
         *        ...
         *        NULL
         *
         * NOT IMPLEMENTED:
         * _auxv: _auxv[0]      <-- auxiliary vector, required by getauxval()
         *        _auxv[1]
         *        ...
         *        NULL
         */
    
         argc = *sp;
         argv = (void *)(sp + 1);
         envp = argv + argc + 1;
    
         ret = _mylib_main(argc, argv, envp);
    
         exit(ret);
     }
    
     /*
     * Start up code inspired by the Linux kernel's nolibc header library.
     * x86-64 System V ABI requires:
     * - %rsp must be 16-byte aligned before calling a function
     * - Deepest stack frame should be zero (%rbp)
     *
     * Requires -fomit-frame-pointer to work.
     */
     void __attribute__((noreturn, optimize("omit-frame-pointer"))) _start() {
         __asm__ volatile (
                 "xor  %ebp, %ebp\n"  // zero the stack frame
                 "mov  %rsp, %rdi\n"  // save stack pointer to %rdi, as arg1 of _start_c
                 "and  $-16, %rsp\n"  // %rsp must be 16-byte aligned before call
                 "call _start_c\n"    // transfer to c runtime
                 "hlt\n"              // ensure it does not return
                 );
         __builtin_unreachable();
     }
    
     #endif // __MY_LIBC_H__
    

Object file relocation

readelf -a echo.o
# elf headers
# section headers
#...
# symbol table

Symbol table:

  • Global (from my_libc.h)
    • _start_c, _start
  • Global (from main)
    • main, my_strlen
  • Compiler adds local symbols (static)
    • syscall, exit, write
    • echo.c (filename), UND (first element is NULL) When linking, don’t care about local symbols
objdump -a echo.o

main() calls mystrlen(), but its (relative from the next instruction address) (little-Endian) address is not yet filled in

  • Linker knows the presence of a function call (e8), how does it know which function to call?
  • It needs to mark it somewhere. Makes another relocation section .rela.text, containing array of the following structure.
    typedef struct {
      Elf64_Addr r_offset;
      uint64_t   r_info;
      int64_t    r_addend;
    } Elf64_Rela;
    
  • r_offset: the address of the unresolved function (1 after e8)
  • r_info: packed with symbol and type, by macros
    • Symbol ELF[32|64]_R_SYM: what symbol this entry is for (nth symbol in .symtab).
      • readelf goes to .symtab and figures out “Sym. Value”
    • Type ELF[32|64]_R_TYPE: type of address that you can put in
      • Absolute vs relative
      • Type 4 is R_X86_64_PLT32: max 64 bytes relative

Multiple objects

really really difficult Somehow, multiple object files brings whole new complexity

echo2

Separate two global functions my_libc.h.

  • _start_c() and _start() (two global functions) are separated to my_libc.c
  • You can start debugging with 2 object files, and extend it to the general case

sum

General case: sum, with 5 C files

  • main.c: main function declaring an array of 5 longs, declaring and calling sum_array() and my_strlen(), print_ulong()
  • sum.c: Defines sum_array() and sum()
  • print_ulong.c: print_ulong()
./cld *.o
./a.out

readelf -a main.o

The address of sum_array, my_strlen, print_ulong are undefined, but there are relocation entries (.rela.text)

  • my_strlen appears twice, since it’s called twice
  • Offset: where in the object file these calls happen
  • Info: symbol index Only when you combine all object files into a single text section and rewrite all the symtab entries, you can do relocation When parsing main.o:
  • Ignore local functions. They don’t go to the executable.
  • For global:
    • main in section 1
    • sum_array UND
    • my_strlen UND
    • print_ulong UND Symbol resolution: create a union of all symtabs output_symtab. Determine which section they belong
  • Need mapping from input symtab to output symtab
  • When you see the second object file that defines or calls my_strlen, you need to know that it’s collected already
  • If a symbol is never defined, linker error
  • If multiple definitions, linker error

For the executable, need to:

  • Fix all st_value.
  • Recalculate st_name (index into .strtab). Need to generate a new strtab that’s a union of all previous strtabs
  • Fix .rela.text entries with new offset and info index from the new .symtab

Static library

Library: collection of object files, packaged into a single archive file that can be linked against E. Make vector library

ar rcs libvector.a addvec.o multivec.o
gcc main.o libvector.a -o main
gcc main.o -lvector -o main     # equivalent
  • Linking libvector.a doesn’t mean you link all the files. It only picks as needed.
  • Linker processes objects from left to right, so archives go last
    • Archives only resolve previously unmatched symbols
    • If circular reference, need to list libraries multiple times

Static: once you pull the objects, it becomes part of your executable main

Shared library

Static is wasteful. Every executable will have a copy of the object referenced. Shared: generate incomplete executable. When it’s run, link the objects shared on the disk by mmap()

  • Multiple processes can also mmap() to a shared library
    $ make
    gcc -Wall -fpic   -c -o main.o main.c
    gcc -Wall -fpic   -c -o sum.o sum.c
    gcc -shared -o libsum.so sum.o
    gcc -o main-dyn main.o libsum.so
    
  • Need -fpic to generate objects that will eventually go in a shared lib
    • (Forces every address have to be relative, position independent)
  • -shared flag creates a shared library libsum.so from sum.o
  • main-dyn does not contain sum.o. It only contains the shell that helps accessing sum.o

Loading shared library

Now run ./main-dyn. Doesn’t work!

$ # Dynamic linker can't find our libsum shared library
$ ./main-dyn 
./main-dyn: error while loading shared libraries: libsum.so: cannot open shared object file: No such file or directory

Need to use ldd to find the libraries needed to be loaded at runtime

$ ldd main-dyn
        linux-vdso.so.1 (0x00007ffc951cf000)
        libsum.so => not found     # main-dyn *knows* about the shared lib
        libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f279d400000)
        /lib64/ld-linux-x86-64.so.2 (0x00007f279d74f000)
  • It knows it needs libsum.so, but can’t find it
  • Also knows and finds libc.so.6 Shared libraries can be found in 3 ways:
    1. In predefined system directory: /lib, /lib64, …
    2. Routinely run ldconfig that maintains a configuration file /etc/ld.so.conf
    3. (simple) Declare environment variable LD_LIBRARY_PATH:
      $ LD_LIBRARY_PATH=.
      
  • This is temporary
  • To make it permanent, run export LD_LIBRARY_PATH=.

Load library with dlopen()

Do it at runtime: dynamically link against libsum.so, find sum_array(), and calls it main-dlopen.c

#include <dlfcn.h>

int main() {
	// lazily load dynamic library
    void *handle = dlopen("libsum.so", RTLD_LAZY);

    long (*f)(long *p, int n) = dlsym(handle, "sum_array");

    long a[5] = {0, 1, 2, 3, 4};
    long sum = f(a, 5);
    printf("sum=%ld\n", sum);

    dlclose(handle);
}

No linking at link time:

gcc -Wall main-dlopen.c
LD_LIBRARY_PATH=. 
./a.out

Library interpositioning

mymalloc.c

#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>

/* malloc wrapper function */
void *malloc(size_t size) {
    void *(*mallocp)(size_t size);
	// look at *other* libraries, find its malloc
    mallocp = dlsym(RTLD_NEXT, "malloc");
    char *ptr = mallocp(size); 
    // printf doesn't work since printf calls malloc
    fputs("malloc wrapper called\n", stderr);
    return ptr;
}
gcc -fpic -shared -o mm.so mymalloc.c && LD_PRELOAD=./mm.so  cal
  • If library listed in LD_PRELOAD, it will also be loaded first
    • This will intercept standard library functions
  • Result: cal calls malloc() and free() a lot

Position Independent Code

PIC

  • Relocation: calculate PC-relative addresses to symbols once they are resolved at link time What if linking dynamically? Need fix at runtime.
  • Don’t know symbol location until library is loaded, but at runtime, text segment already mmap()ed to the address space in read only mode. Can’t relocate
  • Text section needs to be writable, which can’t be shared among processes
  • 32-bit PC relative address can’t reach 64-bit shared library locations. Need 64-bit absolute addressing

Data

Simpler Create a section in Data Segment: global offset table (GOT): array of pointers you have referenced

  • When calling, direct from code to GOT table’s location in data
  • At runtime, when libraries are loaded, GOT is filled in
  • Assumes the relative location of GOT and code section is a constant

alt

mov 0x2008b9(%rip), %rax    # refer to the GOT
addl $0x1, (%rax)           # deref rax again for addcnt

Function

2 levels of redirection: GOT and PLT (procedure link table)

alt

Before first call, GOT[4] has a fake address, since don’t want to resolve every symbol4

First call

  1. Jump to the PLT section of “call addvec()
  2. Grab GOT[4], unfilled to begin with. (proceed to next instruction)
    • Push 1 onto the stack (relocation entry number for addvec())
  3. Jump to PLT[0], a special entry in PLT that will jump to the dynamic linker section
    • Push address of relocation entries GOT[1] (also preparing arguments for the dynamic linker)
  4. Jump to dynamic linker GOT[2]
    • Dynamic linker finds addvec(), do relocation, and put into GOT[4] Subsequent calls
  5. Call addvec(). Jump to PLT section (array of code snippets)
  6. Grab actual (*)addvec() from GOT

Demo

  • objdump -d main-dyn
    • main() executes call 1070 <sum_array@plt>
    • PLT entry at 1070bnd jmp *0x2f45(%rip) # 3fc0 <sum_array@Base>
  • readelf -a main-dyn:
    • Relocation entry in .rela.plt for GOT entry (3fc0):
      Offset          Info           Type           Sym. Value    Sym. Name + Addend
      000000003fc0  000300000007 R_X86_64_JUMP_SLO 0000000000000000 sum_array + 0
      
  • readelf -x .got main-dyn
    • The hexdump shows the fallthrough address at 0x3fc00x1030
      Hex dump of section '.got':
    
      0x00003fb8 00000000 00000000 30100000 00000000 ........0.......
    
  • objdump -d main-dyn:
    • See fallthrough address in .plt section:
      0000000000001020 <.plt>:
      1020:       ff 35 8a 2f 00 00       push   0x2f8a(%rip)        # 3fb0 <_GLOBAL_OFFSET_TABLE_+0x8>
      1026:       f2 ff 25 8b 2f 00 00    bnd jmp *0x2f8b(%rip)        # 3fb8 <_GLOBAL_OFFSET_TABLE_+0x10>
      102d:       0f 1f 00                nopl   (%rax)
      1030:       f3 0f 1e fa             endbr64 
      1034:       68 00 00 00 00          push   $0x0
      1039:       f2 e9 e1 ff ff ff       bnd jmp 1020 <_init+0x20>
      103f:       90        
    

Debugger

Change the code to call sum_array() twice

  • Need to make another executable, with -no-pie (regular executable)
    • Shared library can come from anywhere
    • Code at a fixed address
      make main-no-pie
      ./main-no-pie
      ldb ./main-no-pie
      (ldb) b 401060           # where plt section for sum_array begins
      # observe GOT entry
      (ldb) x 404018           # 401030, directly jump down
      (ldb) c
      # keep stepping
      # first time: reference GOT
      # eventually reach the dynamic linker address
      (ldb) c
      (ldb) x 404018           # (*)sum_array now!
      # will call sum_array()
      

Makefile

Position independence and dynamic linking are orthogonal concepts

  • main-dyn: Pie (position independent) and dynamically normally linked
    • Linked with the C library and libsum
    • file main-dyn shows its’s a LSB pie executable, dynamically linked, interpreter ... (loader)
    • objdump -d main-dyn: all addresses start from 0 (actually 1000, skipping a page for header). Not real addresses. When running, starting address can be any place in the memory
  • main-no-pie
    • gcc -no-pie
    • Also dynamically linked
    • file main-no-pie: LSB executable, doesn’t say “pie”
    • objdump -d main-no-pie: Addresses fixed, needed by ldb
  • main-static-pie: statically linked with pie
    • gcc -static-pie
    • ldd main-static-pie: “statically linked”, has all code in itself, very large in size
    • file main-static-pie: “LSB pie executable”, “static-pie linked”
    • objdump -d main-static-pie: very long file, started from 0
  • main-static: statically linked without pie
    • gcc -static (what ldb and cld uses)
    • ldd main-static: “not a dynamic executable”
    • file main-static: “statically linked”, no “pie”
    • objdump -d: address starting randomly