| 1 | /* http://www.muppetlabs.com/~breadbox/software/elfkickers.html */ |
| 2 | |
| 3 | /* sstrip: Copyright (C) 1999-2001 by Brian Raiter, under the GNU |
| 4 | * General Public License. No warranty. See COPYING for details. |
| 5 | * |
| 6 | * Aug 23, 2004 Hacked by Manuel Novoa III <mjn3@codepoet.org> to |
| 7 | * handle targets of different endianness and/or elf class, making |
| 8 | * it more useful in a cross-devel environment. |
| 9 | */ |
| 10 | |
| 11 | /* ============== original README =================== |
| 12 | * |
| 13 | * sstrip is a small utility that removes the contents at the end of an |
| 14 | * ELF file that are not part of the program's memory image. |
| 15 | * |
| 16 | * Most ELF executables are built with both a program header table and a |
| 17 | * section header table. However, only the former is required in order |
| 18 | * for the OS to load, link and execute a program. sstrip attempts to |
| 19 | * extract the ELF header, the program header table, and its contents, |
| 20 | * leaving everything else in the bit bucket. It can only remove parts of |
| 21 | * the file that occur at the end, after the parts to be saved. However, |
| 22 | * this almost always includes the section header table, and occasionally |
| 23 | * a few random sections that are not used when running a program. |
| 24 | * |
| 25 | * It should be noted that the GNU bfd library is (understandably) |
| 26 | * dependent on the section header table as an index to the file's |
| 27 | * contents. Thus, an executable file that has no section header table |
| 28 | * cannot be used with gdb, objdump, or any other program based upon the |
| 29 | * bfd library, at all. In fact, the program will not even recognize the |
| 30 | * file as a valid executable. (This limitation is noted in the source |
| 31 | * code comments for bfd, and is marked "FIXME", so this may change at |
| 32 | * some future date. However, I would imagine that it is a pretty |
| 33 | * low-priority item, as executables without a section header table are |
| 34 | * rare in the extreme.) This probably also explains why strip doesn't |
| 35 | * offer the option to do this. |
| 36 | * |
| 37 | * Shared library files may also have their section header table removed. |
| 38 | * Such a library will still function; however, it will no longer be |
| 39 | * possible for a compiler to link a new program against it. |
| 40 | * |
| 41 | * As an added bonus, sstrip also tries to removes trailing zero bytes |
| 42 | * from the end of the file. (This normally cannot be done with an |
| 43 | * executable that has a section header table.) |
| 44 | * |
| 45 | * sstrip is a very simplistic program. It depends upon the common |
| 46 | * practice of putting the parts of the file that contribute to the |
| 47 | * memory image at the front, and the remaining material at the end. This |
| 48 | * permits it to discard the latter material without affecting file |
| 49 | * offsets and memory addresses in what remains. Of course, the ELF |
| 50 | * standard permits files to be organized in almost any order, so if a |
| 51 | * pathological linker decided to put its section headers at the top, |
| 52 | * sstrip would be useless on such executables. |
| 53 | */ |
| 54 | |
| 55 | #include <stdio.h> |
| 56 | #include <stdlib.h> |
| 57 | #include <string.h> |
| 58 | #include <errno.h> |
| 59 | #include <unistd.h> |
| 60 | #include <fcntl.h> |
| 61 | #include <elf.h> |
| 62 | |
| 63 | #ifndef TRUE |
| 64 | #define TRUE 1 |
| 65 | #define FALSE 0 |
| 66 | #endif |
| 67 | |
| 68 | /* The name of the program. |
| 69 | */ |
| 70 | static char const *progname; |
| 71 | |
| 72 | /* The name of the current file. |
| 73 | */ |
| 74 | static char const *filename; |
| 75 | |
| 76 | |
| 77 | /* A simple error-handling function. FALSE is always returned for the |
| 78 | * convenience of the caller. |
| 79 | */ |
| 80 | static int err(char const *errmsg) |
| 81 | { |
| 82 | fprintf(stderr, "%s: %s: %s\n", progname, filename, errmsg); |
| 83 | return FALSE; |
| 84 | } |
| 85 | |
| 86 | /* A flag to signal the need for endian reversal. |
| 87 | */ |
| 88 | static int do_reverse_endian; |
| 89 | |
| 90 | /* Get a value from the elf header, compensating for endianness. |
| 91 | */ |
| 92 | #define EGET(X) \ |
| 93 | (__extension__ ({ \ |
| 94 | uint64_t __res; \ |
| 95 | if (!do_reverse_endian) { \ |
| 96 | __res = (X); \ |
| 97 | } else if (sizeof(X) == 1) { \ |
| 98 | __res = (X); \ |
| 99 | } else if (sizeof(X) == 2) { \ |
| 100 | __res = bswap_16((X)); \ |
| 101 | } else if (sizeof(X) == 4) { \ |
| 102 | __res = bswap_32((X)); \ |
| 103 | } else if (sizeof(X) == 8) { \ |
| 104 | __res = bswap_64((X)); \ |
| 105 | } else { \ |
| 106 | fprintf(stderr, "%s: %s: EGET failed for size %d\n", \ |
| 107 | progname, filename, sizeof(X)); \ |
| 108 | exit(EXIT_FAILURE); \ |
| 109 | } \ |
| 110 | __res; \ |
| 111 | })) |
| 112 | |
| 113 | /* Set a value 'Y' in the elf header to 'X', compensating for endianness. |
| 114 | */ |
| 115 | #define ESET(Y,X) \ |
| 116 | do if (!do_reverse_endian) { \ |
| 117 | Y = (X); \ |
| 118 | } else if (sizeof(Y) == 1) { \ |
| 119 | Y = (X); \ |
| 120 | } else if (sizeof(Y) == 2) { \ |
| 121 | Y = bswap_16((uint16_t)(X)); \ |
| 122 | } else if (sizeof(Y) == 4) { \ |
| 123 | Y = bswap_32((uint32_t)(X)); \ |
| 124 | } else if (sizeof(Y) == 8) { \ |
| 125 | Y = bswap_64((uint64_t)(X)); \ |
| 126 | } else { \ |
| 127 | fprintf(stderr, "%s: %s: ESET failed for size %d\n", \ |
| 128 | progname, filename, sizeof(Y)); \ |
| 129 | exit(EXIT_FAILURE); \ |
| 130 | } while (0) |
| 131 | |
| 132 | |
| 133 | /* A macro for I/O errors: The given error message is used only when |
| 134 | * errno is not set. |
| 135 | */ |
| 136 | #define ferr(msg) (err(errno ? strerror(errno) : (msg))) |
| 137 | |
| 138 | |
| 139 | |
| 140 | #define HEADER_FUNCTIONS(CLASS) \ |
| 141 | \ |
| 142 | /* readelfheader() reads the ELF header into our global variable, and \ |
| 143 | * checks to make sure that this is in fact a file that we should be \ |
| 144 | * munging. \ |
| 145 | */ \ |
| 146 | static int readelfheader ## CLASS (int fd, Elf ## CLASS ## _Ehdr *ehdr) \ |
| 147 | { \ |
| 148 | if (read(fd, ((char *)ehdr)+EI_NIDENT, sizeof(*ehdr) - EI_NIDENT) \ |
| 149 | != sizeof(*ehdr) - EI_NIDENT) \ |
| 150 | return ferr("missing or incomplete ELF header."); \ |
| 151 | \ |
| 152 | /* Verify the sizes of the ELF header and the program segment \ |
| 153 | * header table entries. \ |
| 154 | */ \ |
| 155 | if (EGET(ehdr->e_ehsize) != sizeof(Elf ## CLASS ## _Ehdr)) \ |
| 156 | return err("unrecognized ELF header size."); \ |
| 157 | if (EGET(ehdr->e_phentsize) != sizeof(Elf ## CLASS ## _Phdr)) \ |
| 158 | return err("unrecognized program segment header size."); \ |
| 159 | \ |
| 160 | /* Finally, check the file type. \ |
| 161 | */ \ |
| 162 | if (EGET(ehdr->e_type) != ET_EXEC && EGET(ehdr->e_type) != ET_DYN) \ |
| 163 | return err("not an executable or shared-object library."); \ |
| 164 | \ |
| 165 | return TRUE; \ |
| 166 | } \ |
| 167 | \ |
| 168 | /* readphdrtable() loads the program segment header table into memory. \ |
| 169 | */ \ |
| 170 | static int readphdrtable ## CLASS (int fd, Elf ## CLASS ## _Ehdr const *ehdr, \ |
| 171 | Elf ## CLASS ## _Phdr **phdrs) \ |
| 172 | { \ |
| 173 | size_t size; \ |
| 174 | \ |
| 175 | if (!EGET(ehdr->e_phoff) || !EGET(ehdr->e_phnum) \ |
| 176 | ) return err("ELF file has no program header table."); \ |
| 177 | \ |
| 178 | size = EGET(ehdr->e_phnum) * sizeof **phdrs; \ |
| 179 | if (!(*phdrs = malloc(size))) \ |
| 180 | return err("Out of memory!"); \ |
| 181 | \ |
| 182 | errno = 0; \ |
| 183 | if (read(fd, *phdrs, size) != (ssize_t)size) \ |
| 184 | return ferr("missing or incomplete program segment header table."); \ |
| 185 | \ |
| 186 | return TRUE; \ |
| 187 | } \ |
| 188 | \ |
| 189 | /* getmemorysize() determines the offset of the last byte of the file \ |
| 190 | * that is referenced by an entry in the program segment header table. \ |
| 191 | * (Anything in the file after that point is not used when the program \ |
| 192 | * is executing, and thus can be safely discarded.) \ |
| 193 | */ \ |
| 194 | static int getmemorysize ## CLASS (Elf ## CLASS ## _Ehdr const *ehdr, \ |
| 195 | Elf ## CLASS ## _Phdr const *phdrs, \ |
| 196 | unsigned long *newsize) \ |
| 197 | { \ |
| 198 | Elf ## CLASS ## _Phdr const *phdr; \ |
| 199 | unsigned long size, n; \ |
| 200 | int i; \ |
| 201 | \ |
| 202 | /* Start by setting the size to include the ELF header and the \ |
| 203 | * complete program segment header table. \ |
| 204 | */ \ |
| 205 | size = EGET(ehdr->e_phoff) + EGET(ehdr->e_phnum) * sizeof *phdrs; \ |
| 206 | if (size < sizeof *ehdr) \ |
| 207 | size = sizeof *ehdr; \ |
| 208 | \ |
| 209 | /* Then keep extending the size to include whatever data the \ |
| 210 | * program segment header table references. \ |
| 211 | */ \ |
| 212 | for (i = 0, phdr = phdrs ; i < EGET(ehdr->e_phnum) ; ++i, ++phdr) { \ |
| 213 | if (EGET(phdr->p_type) != PT_NULL) { \ |
| 214 | n = EGET(phdr->p_offset) + EGET(phdr->p_filesz); \ |
| 215 | if (n > size) \ |
| 216 | size = n; \ |
| 217 | } \ |
| 218 | } \ |
| 219 | \ |
| 220 | *newsize = size; \ |
| 221 | return TRUE; \ |
| 222 | } \ |
| 223 | \ |
| 224 | /* modifyheaders() removes references to the section header table if \ |
| 225 | * it was stripped, and reduces program header table entries that \ |
| 226 | * included truncated bytes at the end of the file. \ |
| 227 | */ \ |
| 228 | static int modifyheaders ## CLASS (Elf ## CLASS ## _Ehdr *ehdr, \ |
| 229 | Elf ## CLASS ## _Phdr *phdrs, \ |
| 230 | unsigned long newsize) \ |
| 231 | { \ |
| 232 | Elf ## CLASS ## _Phdr *phdr; \ |
| 233 | int i; \ |
| 234 | \ |
| 235 | /* If the section header table is gone, then remove all references \ |
| 236 | * to it in the ELF header. \ |
| 237 | */ \ |
| 238 | if (EGET(ehdr->e_shoff) >= newsize) { \ |
| 239 | ESET(ehdr->e_shoff,0); \ |
| 240 | ESET(ehdr->e_shnum,0); \ |
| 241 | ESET(ehdr->e_shentsize,0); \ |
| 242 | ESET(ehdr->e_shstrndx,0); \ |
| 243 | } \ |
| 244 | \ |
| 245 | /* The program adjusts the file size of any segment that was \ |
| 246 | * truncated. The case of a segment being completely stripped out \ |
| 247 | * is handled separately. \ |
| 248 | */ \ |
| 249 | for (i = 0, phdr = phdrs ; i < EGET(ehdr->e_phnum) ; ++i, ++phdr) { \ |
| 250 | if (EGET(phdr->p_offset) >= newsize) { \ |
| 251 | ESET(phdr->p_offset,newsize); \ |
| 252 | ESET(phdr->p_filesz,0); \ |
| 253 | } else if (EGET(phdr->p_offset) + EGET(phdr->p_filesz) > newsize) { \ |
| 254 | ESET(phdr->p_filesz, newsize - EGET(phdr->p_offset)); \ |
| 255 | } \ |
| 256 | } \ |
| 257 | \ |
| 258 | return TRUE; \ |
| 259 | } \ |
| 260 | \ |
| 261 | /* commitchanges() writes the new headers back to the original file \ |
| 262 | * and sets the file to its new size. \ |
| 263 | */ \ |
| 264 | static int commitchanges ## CLASS (int fd, Elf ## CLASS ## _Ehdr const *ehdr, \ |
| 265 | Elf ## CLASS ## _Phdr *phdrs, \ |
| 266 | unsigned long newsize) \ |
| 267 | { \ |
| 268 | size_t n; \ |
| 269 | \ |
| 270 | /* Save the changes to the ELF header, if any. \ |
| 271 | */ \ |
| 272 | if (lseek(fd, 0, SEEK_SET)) \ |
| 273 | return ferr("could not rewind file"); \ |
| 274 | errno = 0; \ |
| 275 | if (write(fd, ehdr, sizeof *ehdr) != sizeof *ehdr) \ |
| 276 | return err("could not modify file"); \ |
| 277 | \ |
| 278 | /* Save the changes to the program segment header table, if any. \ |
| 279 | */ \ |
| 280 | if (lseek(fd, EGET(ehdr->e_phoff), SEEK_SET) == (off_t)-1) { \ |
| 281 | err("could not seek in file."); \ |
| 282 | goto warning; \ |
| 283 | } \ |
| 284 | n = EGET(ehdr->e_phnum) * sizeof *phdrs; \ |
| 285 | if (write(fd, phdrs, n) != (ssize_t)n) { \ |
| 286 | err("could not write to file"); \ |
| 287 | goto warning; \ |
| 288 | } \ |
| 289 | \ |
| 290 | /* Eleventh-hour sanity check: don't truncate before the end of \ |
| 291 | * the program segment header table. \ |
| 292 | */ \ |
| 293 | if (newsize < EGET(ehdr->e_phoff) + n) \ |
| 294 | newsize = EGET(ehdr->e_phoff) + n; \ |
| 295 | \ |
| 296 | /* Chop off the end of the file. \ |
| 297 | */ \ |
| 298 | if (ftruncate(fd, newsize)) { \ |
| 299 | err("could not resize file"); \ |
| 300 | goto warning; \ |
| 301 | } \ |
| 302 | \ |
| 303 | return TRUE; \ |
| 304 | \ |
| 305 | warning: \ |
| 306 | return err("ELF file may have been corrupted!"); \ |
| 307 | } |
| 308 | |
| 309 | |
| 310 | /* First elements of Elf32_Ehdr and Elf64_Ehdr are common. |
| 311 | */ |
| 312 | static int readelfheaderident(int fd, Elf32_Ehdr *ehdr) |
| 313 | { |
| 314 | errno = 0; |
| 315 | if (read(fd, ehdr, EI_NIDENT) != EI_NIDENT) |
| 316 | return ferr("missing or incomplete ELF header."); |
| 317 | |
| 318 | /* Check the ELF signature. |
| 319 | */ |
| 320 | if (!(ehdr->e_ident[EI_MAG0] == ELFMAG0 && |
| 321 | ehdr->e_ident[EI_MAG1] == ELFMAG1 && |
| 322 | ehdr->e_ident[EI_MAG2] == ELFMAG2 && |
| 323 | ehdr->e_ident[EI_MAG3] == ELFMAG3)) |
| 324 | { |
| 325 | err("missing ELF signature."); |
| 326 | return -1; |
| 327 | } |
| 328 | |
| 329 | /* Compare the file's class and endianness with the program's. |
| 330 | */ |
| 331 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
| 332 | if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) { |
| 333 | do_reverse_endian = 0; |
| 334 | } else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { |
| 335 | /* fprintf(stderr, "ELF file has different endianness.\n"); */ |
| 336 | do_reverse_endian = 1; |
| 337 | } |
| 338 | #elif __BYTE_ORDER == __BIG_ENDIAN |
| 339 | if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) { |
| 340 | /* fprintf(stderr, "ELF file has different endianness.\n"); */ |
| 341 | do_reverse_endian = 1; |
| 342 | } else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { |
| 343 | do_reverse_endian = 0; |
| 344 | } |
| 345 | #else |
| 346 | #error unkown endianness |
| 347 | #endif |
| 348 | else { |
| 349 | err("Unsupported endianness"); |
| 350 | return -1; |
| 351 | } |
| 352 | |
| 353 | /* Check the target architecture. |
| 354 | */ |
| 355 | /* if (EGET(ehdr->e_machine) != ELF_ARCH) { */ |
| 356 | /* /\* return err("ELF file created for different architecture."); *\/ */ |
| 357 | /* fprintf(stderr, "ELF file created for different architecture.\n"); */ |
| 358 | /* } */ |
| 359 | return ehdr->e_ident[EI_CLASS]; |
| 360 | } |
| 361 | |
| 362 | |
| 363 | HEADER_FUNCTIONS(32) |
| 364 | |
| 365 | HEADER_FUNCTIONS(64) |
| 366 | |
| 367 | /* truncatezeros() examines the bytes at the end of the file's |
| 368 | * size-to-be, and reduces the size to exclude any trailing zero |
| 369 | * bytes. |
| 370 | */ |
| 371 | static int truncatezeros(int fd, unsigned long *newsize) |
| 372 | { |
| 373 | unsigned char contents[1024]; |
| 374 | unsigned long size, n; |
| 375 | |
| 376 | size = *newsize; |
| 377 | do { |
| 378 | n = sizeof contents; |
| 379 | if (n > size) |
| 380 | n = size; |
| 381 | if (lseek(fd, size - n, SEEK_SET) == (off_t)-1) |
| 382 | return ferr("cannot seek in file."); |
| 383 | if (read(fd, contents, n) != (ssize_t)n) |
| 384 | return ferr("cannot read file contents"); |
| 385 | while (n && !contents[--n]) |
| 386 | --size; |
| 387 | } while (size && !n); |
| 388 | |
| 389 | /* Sanity check. |
| 390 | */ |
| 391 | if (!size) |
| 392 | return err("ELF file is completely blank!"); |
| 393 | |
| 394 | *newsize = size; |
| 395 | return TRUE; |
| 396 | } |
| 397 | |
| 398 | /* main() loops over the cmdline arguments, leaving all the real work |
| 399 | * to the other functions. |
| 400 | */ |
| 401 | int main(int argc, char *argv[]) |
| 402 | { |
| 403 | int fd; |
| 404 | union { |
| 405 | Elf32_Ehdr ehdr32; |
| 406 | Elf64_Ehdr ehdr64; |
| 407 | } e; |
| 408 | union { |
| 409 | Elf32_Phdr *phdrs32; |
| 410 | Elf64_Phdr *phdrs64; |
| 411 | } p; |
| 412 | unsigned long newsize; |
| 413 | char **arg; |
| 414 | int failures = 0; |
| 415 | |
| 416 | if (argc < 2 || argv[1][0] == '-') { |
| 417 | printf("Usage: sstrip FILE...\n" |
| 418 | "sstrip discards all nonessential bytes from an executable.\n\n" |
| 419 | "Version 2.0-X Copyright (C) 2000,2001 Brian Raiter.\n" |
| 420 | "Cross-devel hacks Copyright (C) 2004 Manuel Novoa III.\n" |
| 421 | "This program is free software, licensed under the GNU\n" |
| 422 | "General Public License. There is absolutely no warranty.\n"); |
| 423 | return EXIT_SUCCESS; |
| 424 | } |
| 425 | |
| 426 | progname = argv[0]; |
| 427 | |
| 428 | for (arg = argv + 1 ; *arg != NULL ; ++arg) { |
| 429 | filename = *arg; |
| 430 | |
| 431 | fd = open(*arg, O_RDWR); |
| 432 | if (fd < 0) { |
| 433 | ferr("can't open"); |
| 434 | ++failures; |
| 435 | continue; |
| 436 | } |
| 437 | |
| 438 | switch (readelfheaderident(fd, &e.ehdr32)) { |
| 439 | case ELFCLASS32: |
| 440 | if (!(readelfheader32(fd, &e.ehdr32) && |
| 441 | readphdrtable32(fd, &e.ehdr32, &p.phdrs32) && |
| 442 | getmemorysize32(&e.ehdr32, p.phdrs32, &newsize) && |
| 443 | truncatezeros(fd, &newsize) && |
| 444 | modifyheaders32(&e.ehdr32, p.phdrs32, newsize) && |
| 445 | commitchanges32(fd, &e.ehdr32, p.phdrs32, newsize))) |
| 446 | ++failures; |
| 447 | break; |
| 448 | case ELFCLASS64: |
| 449 | if (!(readelfheader64(fd, &e.ehdr64) && |
| 450 | readphdrtable64(fd, &e.ehdr64, &p.phdrs64) && |
| 451 | getmemorysize64(&e.ehdr64, p.phdrs64, &newsize) && |
| 452 | truncatezeros(fd, &newsize) && |
| 453 | modifyheaders64(&e.ehdr64, p.phdrs64, newsize) && |
| 454 | commitchanges64(fd, &e.ehdr64, p.phdrs64, newsize))) |
| 455 | ++failures; |
| 456 | break; |
| 457 | default: |
| 458 | ++failures; |
| 459 | break; |
| 460 | } |
| 461 | close(fd); |
| 462 | } |
| 463 | |
| 464 | return failures ? EXIT_FAILURE : EXIT_SUCCESS; |
| 465 | } |
| 466 | |