14 Linking
Published:
Linker
seald
Minimal ELF
Just generating Ehdr
, fill out all the files
./gen-exe
# creates a.out, a binary file with *just* the header (64b)
readelf -a a.out
No data section, no linking with standard library
Has a “section to segment mapping”
Minimal exe
Elf header, program header, .txt, section header, but no strings Assemble hello.s
into object
cpp hello.c hello.i
cc1 hello.i -Og -o hello.s
as -o hello.o hello.s # assemble hello.s
ld -o hello hello.o
hello.s
- Push 4-byte integer “\nPSA” (written backwards) onto the stack
- Setup
syscall
registers towrite(1, "ASP\n", 4)
exit(0)
_start:
declares a function..globl
makes it have a symbol (global function)readelf hello # a few more symbols from real linker ./cld hello.o # creates a.out readelf ./a.out
Sections will be defined, but
<no-strings>
, since don’t have.shstr
tab- Put
.text
int offset 0x1000 (dec 4096, a page) - Each segment must start at a page boundary (since
mmap()
ed) - If not a new segment, can start anywhere. There may still be alignment requirements
Relocation
Link single object file, compiled from C echo.c
char newline[] = "\n";
// shorthand newline[2] = { '\n', '\0' };
// STACK allocated array
char *newline = "\n";
// in read-only section, declare a stack-allocated pointer
// can't have, since we don't yet have a .rodata section
Need to write my_libc.h
to handle write()
as a system call, adapted from the Linux kernel
- Define constants
#ifndef __MY_LIBC_H__ #define __MY_LIBC_H__ /* * Header file for minimal C standard library. */ // Standard stream file descriptors #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 // System call numbers for x86_64 Linux #define __NR_write 1 #define __NR_exit 60
syscall()
is implemented as an inline assembly function (mixing assembly inside of C code)static inline long syscall(long n, long a1, long a2, long a3, long a4, long a5, long a6) { long ret; // map variables to specific registers register long _num asm("rax") = n; register long _arg1 asm("rdi") = a1; register long _arg2 asm("rsi") = a2; register long _arg3 asm("rdx") = a3; register long _arg4 asm("r10") = a4; register long _arg5 asm("r8") = a5; register long _arg6 asm("r9") = a6; // map variables to registers, with readonly, writeonly, ... asm volatile ( "syscall\n\t" : "=a" (ret) : "0" (_num), "r" (_arg1), "r" (_arg2), "r" (_arg3), "r" (_arg4), "r" (_arg5), "r" (_arg6) : "memory", "cc", "r11", "rcx" ); return ret; }
- Now write
exit()
andwrite()
syscalls- Defined as static, only usable in this compilation unit (only C files that
#include
this header) - In the ELF files, those symbols will be defined as local, won’t get linked
static inline __attribute__((noreturn)) void exit(int code) { syscall(__NR_exit, code, 0, 0, 0, 0, 0); __builtin_unreachable(); // exit() is not suppose to return } static inline void write(int fd, const char *buf, unsigned long len) { syscall(__NR_write, fd, (long)buf, len, 0, 0, 0); }
- Defined as static, only usable in this compilation unit (only C files that
Loader jumps to
_start
for entry point/* * Sets up the environment for a C program and calls main(). */ // global functions void _start_c(long *sp) { // setup the argv array long argc; char **argv; char **envp; int ret; // Silence warnings about different types for main() int _mylib_main(int, char **, char **) __asm__ ("main"); // (argc, argv, envar) /* * sp : argc <-- argument count, required by main() * argv: argv[0] <-- argument vector, required by main() * argv[1] * ... * argv[argc-1] * NULL * envp: envp[0] <-- environment variables, required by main()/getenv() * envp[1] * ... * NULL * * NOT IMPLEMENTED: * _auxv: _auxv[0] <-- auxiliary vector, required by getauxval() * _auxv[1] * ... * NULL */ argc = *sp; argv = (void *)(sp + 1); envp = argv + argc + 1; ret = _mylib_main(argc, argv, envp); exit(ret); } /* * Start up code inspired by the Linux kernel's nolibc header library. * x86-64 System V ABI requires: * - %rsp must be 16-byte aligned before calling a function * - Deepest stack frame should be zero (%rbp) * * Requires -fomit-frame-pointer to work. */ void __attribute__((noreturn, optimize("omit-frame-pointer"))) _start() { __asm__ volatile ( "xor %ebp, %ebp\n" // zero the stack frame "mov %rsp, %rdi\n" // save stack pointer to %rdi, as arg1 of _start_c "and $-16, %rsp\n" // %rsp must be 16-byte aligned before call "call _start_c\n" // transfer to c runtime "hlt\n" // ensure it does not return ); __builtin_unreachable(); } #endif // __MY_LIBC_H__
Object file relocation
readelf -a echo.o
# elf headers
# section headers
#...
# symbol table
Symbol table:
- Global (from
my_libc.h
)_start_c
,_start
- Global (from main)
main
,my_strlen
- Compiler adds local symbols (static)
syscall
,exit
,write
echo.c
(filename),UND
(first element isNULL
) When linking, don’t care about local symbols
objdump -a echo.o
main()
calls mystrlen()
, but its (relative from the next instruction address) (little-Endian) address is not yet filled in
- Linker knows the presence of a function call (
e8
), how does it know which function to call? - It needs to mark it somewhere. Makes another relocation section
.rela.text
, containing array of the following structure.typedef struct { Elf64_Addr r_offset; uint64_t r_info; int64_t r_addend; } Elf64_Rela;
r_offset
: the address of the unresolved function (1 aftere8
)r_info
: packed with symbol and type, by macros- Symbol
ELF[32|64]_R_SYM
: what symbol this entry is for (nth symbol in.symtab
).readelf
goes to.symtab
and figures out “Sym. Value”
- Type
ELF[32|64]_R_TYPE
: type of address that you can put in- Absolute vs relative
- Type 4 is
R_X86_64_PLT32
: max 64 bytes relative
- Symbol
Multiple objects
really really difficult Somehow, multiple object files brings whole new complexity
echo2
Separate two global functions my_libc.h
.
_start_c()
and_start()
(two global functions) are separated tomy_libc.c
- You can start debugging with 2 object files, and extend it to the general case
sum
General case: sum
, with 5 C files
main.c
: main function declaring an array of 5 longs, declaring and callingsum_array()
andmy_strlen()
,print_ulong()
sum.c
: Definessum_array()
andsum()
print_ulong.c
:print_ulong()
./cld *.o
./a.out
readelf -a main.o
The address of sum_array
, my_strlen
, print_ulong
are undefined, but there are relocation entries (.rela.text
)
my_strlen
appears twice, since it’s called twice- Offset: where in the object file these calls happen
- Info: symbol index Only when you combine all object files into a single text section and rewrite all the symtab entries, you can do relocation When parsing
main.o
: - Ignore local functions. They don’t go to the executable.
- For global:
main
in section 1sum_array
UNDmy_strlen
UNDprint_ulong
UND Symbol resolution: create a union of all symtabsoutput_symtab
. Determine which section they belong
- Need mapping from input symtab to output symtab
- When you see the second object file that defines or calls
my_strlen
, you need to know that it’s collected already - If a symbol is never defined, linker error
- If multiple definitions, linker error
For the executable, need to:
- Fix all
st_value
. - Recalculate
st_name
(index into.strtab
). Need to generate a new strtab that’s a union of all previous strtabs - Fix
.rela.text
entries with new offset and info index from the new.symtab
Static library
Library: collection of object files, packaged into a single archive file that can be linked against E. Make vector
library
ar rcs libvector.a addvec.o multivec.o
gcc main.o libvector.a -o main
gcc main.o -lvector -o main # equivalent
- Linking
libvector.a
doesn’t mean you link all the files. It only picks as needed. - Linker processes objects from left to right, so archives go last
- Archives only resolve previously unmatched symbols
- If circular reference, need to list libraries multiple times
Static: once you pull the objects, it becomes part of your executable main
Shared library
Static is wasteful. Every executable will have a copy of the object referenced. Shared: generate incomplete executable. When it’s run, link the objects shared on the disk by mmap()
- Multiple processes can also
mmap()
to a shared library$ make gcc -Wall -fpic -c -o main.o main.c gcc -Wall -fpic -c -o sum.o sum.c gcc -shared -o libsum.so sum.o gcc -o main-dyn main.o libsum.so
- Need
-fpic
to generate objects that will eventually go in a shared lib- (Forces every address have to be relative, position independent)
-shared
flag creates a shared librarylibsum.so
fromsum.o
main-dyn
does not containsum.o
. It only contains the shell that helps accessingsum.o
Loading shared library
Now run ./main-dyn
. Doesn’t work!
$ # Dynamic linker can't find our libsum shared library
$ ./main-dyn
./main-dyn: error while loading shared libraries: libsum.so: cannot open shared object file: No such file or directory
Need to use ldd
to find the libraries needed to be loaded at runtime
$ ldd main-dyn
linux-vdso.so.1 (0x00007ffc951cf000)
libsum.so => not found # main-dyn *knows* about the shared lib
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f279d400000)
/lib64/ld-linux-x86-64.so.2 (0x00007f279d74f000)
- It knows it needs
libsum.so
, but can’t find it - Also knows and finds
libc.so.6
Shared libraries can be found in 3 ways:- In predefined system directory: /lib, /lib64, …
- Routinely run
ldconfig
that maintains a configuration file/etc/ld.so.conf
- (simple) Declare environment variable
LD_LIBRARY_PATH
:$ LD_LIBRARY_PATH=.
- This is temporary
- To make it permanent, run
export LD_LIBRARY_PATH=.
Load library with dlopen()
Do it at runtime: dynamically link against libsum.so
, find sum_array()
, and calls it main-dlopen.c
#include <dlfcn.h>
int main() {
// lazily load dynamic library
void *handle = dlopen("libsum.so", RTLD_LAZY);
long (*f)(long *p, int n) = dlsym(handle, "sum_array");
long a[5] = {0, 1, 2, 3, 4};
long sum = f(a, 5);
printf("sum=%ld\n", sum);
dlclose(handle);
}
No linking at link time:
gcc -Wall main-dlopen.c
LD_LIBRARY_PATH=.
./a.out
Library interpositioning
mymalloc.c
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
/* malloc wrapper function */
void *malloc(size_t size) {
void *(*mallocp)(size_t size);
// look at *other* libraries, find its malloc
mallocp = dlsym(RTLD_NEXT, "malloc");
char *ptr = mallocp(size);
// printf doesn't work since printf calls malloc
fputs("malloc wrapper called\n", stderr);
return ptr;
}
gcc -fpic -shared -o mm.so mymalloc.c && LD_PRELOAD=./mm.so cal
- If library listed in
LD_PRELOAD
, it will also be loaded first- This will intercept standard library functions
- Result:
cal
callsmalloc()
andfree()
a lot
Position Independent Code
PIC
- Relocation: calculate PC-relative addresses to symbols once they are resolved at link time What if linking dynamically? Need fix at runtime.
- Don’t know symbol location until library is loaded, but at runtime, text segment already
mmap()
ed to the address space in read only mode. Can’t relocate - Text section needs to be writable, which can’t be shared among processes
- 32-bit PC relative address can’t reach 64-bit shared library locations. Need 64-bit absolute addressing
Data
Simpler Create a section in Data Segment: global offset table (GOT): array of pointers you have referenced
- When calling, direct from code to GOT table’s location in data
- At runtime, when libraries are loaded, GOT is filled in
- Assumes the relative location of GOT and code section is a constant
mov 0x2008b9(%rip), %rax # refer to the GOT
addl $0x1, (%rax) # deref rax again for addcnt
Function
2 levels of redirection: GOT and PLT (procedure link table)
Before first call, GOT[4]
has a fake address, since don’t want to resolve every symbol4
First call
- Jump to the PLT section of “call
addvec()
” - Grab
GOT[4]
, unfilled to begin with. (proceed to next instruction)- Push
1
onto the stack (relocation entry number foraddvec()
)
- Push
- Jump to
PLT[0]
, a special entry in PLT that will jump to the dynamic linker section- Push address of relocation entries
GOT[1]
(also preparing arguments for the dynamic linker)
- Push address of relocation entries
- Jump to dynamic linker
GOT[2]
- Dynamic linker finds
addvec()
, do relocation, and put intoGOT[4]
Subsequent calls
- Dynamic linker finds
- Call
addvec()
. Jump to PLT section (array of code snippets) - Grab actual
(*)addvec()
from GOT
Demo
objdump -d main-dyn
main()
executescall 1070 <sum_array@plt>
- PLT entry at
1070
:bnd jmp *0x2f45(%rip) # 3fc0 <sum_array@Base>
readelf -a main-dyn
:- Relocation entry in
.rela.plt
for GOT entry (3fc0):Offset Info Type Sym. Value Sym. Name + Addend 000000003fc0 000300000007 R_X86_64_JUMP_SLO 0000000000000000 sum_array + 0
- Relocation entry in
readelf -x .got main-dyn
- The hexdump shows the fallthrough address at
0x3fc0
:0x1030
Hex dump of section '.got': 0x00003fb8 00000000 00000000 30100000 00000000 ........0.......
- The hexdump shows the fallthrough address at
objdump -d main-dyn
:- See fallthrough address in
.plt
section:
0000000000001020 <.plt>: 1020: ff 35 8a 2f 00 00 push 0x2f8a(%rip) # 3fb0 <_GLOBAL_OFFSET_TABLE_+0x8> 1026: f2 ff 25 8b 2f 00 00 bnd jmp *0x2f8b(%rip) # 3fb8 <_GLOBAL_OFFSET_TABLE_+0x10> 102d: 0f 1f 00 nopl (%rax) 1030: f3 0f 1e fa endbr64 1034: 68 00 00 00 00 push $0x0 1039: f2 e9 e1 ff ff ff bnd jmp 1020 <_init+0x20> 103f: 90
- See fallthrough address in
Debugger
Change the code to call sum_array()
twice
- Need to make another executable, with
-no-pie
(regular executable)- Shared library can come from anywhere
- Code at a fixed address
make main-no-pie ./main-no-pie ldb ./main-no-pie (ldb) b 401060 # where plt section for sum_array begins # observe GOT entry (ldb) x 404018 # 401030, directly jump down (ldb) c # keep stepping # first time: reference GOT # eventually reach the dynamic linker address (ldb) c (ldb) x 404018 # (*)sum_array now! # will call sum_array()
Makefile
Position independence and dynamic linking are orthogonal concepts
main-dyn
: Pie (position independent) and dynamically normally linked- Linked with the C library and libsum
file main-dyn
shows its’s aLSB pie executable
,dynamically linked, interpreter ...
(loader)objdump -d main-dyn
: all addresses start from 0 (actually 1000, skipping a page for header). Not real addresses. When running, starting address can be any place in the memory
main-no-pie
gcc -no-pie
- Also dynamically linked
file main-no-pie
:LSB executable
, doesn’t say “pie”objdump -d main-no-pie
: Addresses fixed, needed byldb
main-static-pie
: statically linked withpie
gcc -static-pie
ldd main-static-pie
: “statically linked”, has all code in itself, very large in sizefile main-static-pie
: “LSB pie executable”, “static-pie linked”objdump -d main-static-pie
: very long file, started from 0
main-static
: statically linked withoutpie
gcc -static
(whatldb
andcld
uses)ldd main-static
: “not a dynamic executable”file main-static
: “statically linked”, no “pie”objdump -d
: address starting randomly