Last active
October 31, 2019 18:14
-
-
Save nezlooy/5a44f468ea19018bf0e2491ec886cd94 to your computer and use it in GitHub Desktop.
Binary Grep
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #define _XOPEN_SOURCE 1 /* Required under GLIBC for nftw() */ | |
| #define _XOPEN_SOURCE_EXTENDED 1 /* Same */ | |
| /* | |
| sudo gcc -O2 -x c -o /usr/bin/bgrep bgrep.c | |
| */ | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <sys/types.h> | |
| #include <dirent.h> | |
| #include <fcntl.h> | |
| #include <unistd.h> | |
| #include <sys/stat.h> | |
| #include <getopt.h> | |
| #include <ctype.h> | |
| #include <stdarg.h> | |
| #include <stdbool.h> | |
| #include <limits.h> | |
| #include <ftw.h> | |
| #include <regex.h> | |
| #include <errno.h> | |
| #include <libgen.h> | |
| #include <signal.h> | |
| #define BGREP_VERSION "0.5.1" | |
| #define SPARE_FDS 5 | |
| extern int process(const char *fpath, const struct stat *sb, int flag, struct FTW *ftwbuf); | |
| #ifndef O_BINARY | |
| #define O_BINARY 0 | |
| #endif | |
| #define CLR_BOLD "\033[1m" | |
| #define CLR_NBOLD "\033[22m" | |
| #define CLR_UNDERLINE "\033[4m" | |
| #define CLR_NUNDERLINE "\033[4m" | |
| #define CLR_INVERSE "\033[7m" | |
| #define CLR_NINVERSE "\033[7m" | |
| #define CLR_GRAY "\033[90m" | |
| #define CLR_RED "\033[91m" | |
| #define CLR_GREEN "\033[92m" | |
| #define CLR_YELLOW "\033[93m" | |
| #define CLR_BLUE "\033[94m" | |
| #define CLR_PURPLE "\033[95m" | |
| #define CLR_CYAN "\033[96m" | |
| #define CLR_BREAK "\033[0m" | |
| int bytes_before = 0, bytes_after = 0; | |
| unsigned long long greater_or_equal_offset = 0, | |
| less_or_equal_offset = 0, | |
| equal_offset = 0; | |
| bool use_greater_or_equal_offset = false, | |
| use_less_or_equal_offset = false, | |
| use_equal_offset = false, | |
| use_filename_regex = false; | |
| bool print_status_by_sig = false; | |
| unsigned char grep_value[0x100], grep_mask[0x100]; | |
| int grep_len = 0; | |
| regex_t r; | |
| void die(const char* msg, ...); | |
| void print_status_by_sig_fn(const char *msg, ...); | |
| void print_char(unsigned char c){ | |
| if (isprint(c)) | |
| putchar(c); | |
| else | |
| printf("\\x%02x", (int)c); | |
| } | |
| int ascii2hex(char c){ | |
| if (c < '0') | |
| return -1; | |
| else if (c <= '9') | |
| return c - '0'; | |
| else if (c < 'A') | |
| return -1; | |
| else if (c <= 'F') | |
| return c - 'A' + 10; | |
| else if (c < 'a') | |
| return -1; | |
| else if (c <= 'f') | |
| return c - 'a' + 10; | |
| else | |
| return -1; | |
| } | |
| /* TODO: this will not work with STDIN or pipes | |
| * we have to maintain a window of the bytes before which I am too lazy to do | |
| * right now. | |
| */ | |
| void dump_context(int fd, unsigned long long pos){ | |
| off_t save_pos = lseek(fd, 0, SEEK_CUR); | |
| if (save_pos == (off_t)-1){ | |
| perror("lseek"); | |
| return; /* this one is not fatal*/ | |
| } | |
| char buf[1024]; | |
| off_t start = pos - bytes_before; | |
| if (pos == 0) | |
| start = 0; | |
| int bytes_to_read = bytes_before + bytes_after; | |
| if (lseek(fd, start, SEEK_SET) == (off_t)-1){ | |
| perror("lseek"); | |
| return; | |
| } | |
| for (;bytes_to_read;){ | |
| int read_chunk = bytes_to_read > sizeof(buf) ? sizeof(buf) : bytes_to_read; | |
| int bytes_read = read(fd, buf, read_chunk); | |
| if (bytes_to_read < 0){ | |
| perror("read"); | |
| die("Error reading context"); | |
| } | |
| char *buf_end = buf + read_chunk; | |
| char *p = buf; | |
| printf("\t"); | |
| for (; p < buf_end; p++) | |
| print_char(*p); | |
| bytes_to_read -= read_chunk; | |
| } | |
| printf("\n\n"); | |
| if (lseek(fd, save_pos, SEEK_SET) == (off_t)-1){ | |
| perror("lseek"); | |
| die("Could not restore the original file offset while printing context"); | |
| } | |
| } | |
| unsigned int files_counter = 0; | |
| void searchfile(const char *fpath, int fd){ | |
| off_t offset = 0; | |
| unsigned char buf[1024]; | |
| if (use_greater_or_equal_offset && (offset = use_greater_or_equal_offset) && lseek(fd, use_greater_or_equal_offset, SEEK_SET) == (off_t)-1){ | |
| perror("use_greater_or_equal_offset && llseek"); | |
| return; | |
| } | |
| if (use_equal_offset && (offset = equal_offset) && lseek(fd, equal_offset, SEEK_SET) == (off_t)-1){ | |
| perror("use_greater_or_equal_offset && llseek"); | |
| return; | |
| } | |
| files_counter++; | |
| if (print_status_by_sig){ | |
| fprintf(stderr, "%sScanning for %s%s\"%s\"%s %s(files counter: %d)%s\n", CLR_GREEN, CLR_BREAK, CLR_RED, fpath, CLR_BREAK, CLR_YELLOW, files_counter, CLR_BREAK); | |
| fflush(stderr); | |
| print_status_by_sig = false; | |
| } | |
| int len = grep_len; | |
| len--; | |
| while (1){ | |
| int r; | |
| void *mv = memmove(buf, buf + sizeof(buf) - len, len); | |
| r = read(fd, buf + len, sizeof(buf) - len); | |
| if (r < 0){ | |
| perror("read"); | |
| return; | |
| } else | |
| if (!r) | |
| return; | |
| int o, i; | |
| for (o = offset ? 0 : len; o < r; ++o){ | |
| for (i = 0; i <= len; ++i) | |
| if ((buf[o + i] & grep_mask[i]) != grep_value[i]) | |
| break; | |
| if (i > len){ | |
| unsigned long long pos = (unsigned long long)(offset + o - len); | |
| if (use_equal_offset && equal_offset != pos) | |
| return; | |
| printf("%sFound in \"%s\" at %s%s0x%llx ( %d )%s\n", CLR_CYAN, fpath, CLR_BREAK, CLR_YELLOW, pos, pos, CLR_BREAK); | |
| if (bytes_before || bytes_after) | |
| dump_context(fd, pos); | |
| if (use_equal_offset) | |
| return; | |
| } | |
| } | |
| offset += r; | |
| if (use_less_or_equal_offset && offset > less_or_equal_offset) | |
| return; | |
| if (use_equal_offset && offset > equal_offset) | |
| return; | |
| } | |
| } | |
| void die(const char *msg, ...){ | |
| va_list ap; | |
| va_start(ap, msg); | |
| vfprintf(stderr, msg, ap); | |
| fprintf(stderr, "\n"); | |
| va_end(ap); | |
| exit(EXIT_FAILURE); | |
| } | |
| void usage(char** argv){ | |
| fprintf(stderr, "bgrep version: %s\n", BGREP_VERSION); | |
| fprintf(stderr, "usage: %s [args] <hex> [<path> [...]]\n\n", *argv); | |
| fprintf(stderr, "args:\n"); | |
| fprintf(stderr, " -B n show N bytes before\n"); | |
| fprintf(stderr, " -A n show N bytes after\n"); | |
| fprintf(stderr, " -C n show N bytes before and after\n"); | |
| fprintf(stderr, " -P \"REGEXP\" filename regex pattern\n"); | |
| fprintf(stderr, " -c `chdir` to each directory before handling its contents\n"); | |
| fprintf(stderr, " -g n offset is greater than or equal\n"); | |
| fprintf(stderr, " -l n offset is less than or equal\n"); | |
| fprintf(stderr, " -e n offset is equal\n"); | |
| exit(1); | |
| } | |
| void walk(char *fpath, int flags){ | |
| struct stat s; | |
| if (lstat(fpath, &s)){ | |
| perror(fpath); | |
| return; | |
| } | |
| if (!S_ISDIR(s.st_mode)){ | |
| if (use_filename_regex && regexec(&r, basename(fpath), 0, 0, 0)){ | |
| return; | |
| } | |
| int fd = open(fpath, O_RDONLY | O_BINARY); | |
| if (fd < 0){ | |
| perror(fpath); | |
| } else { | |
| searchfile(fpath, fd); | |
| close(fd); | |
| } | |
| return; | |
| } | |
| int nfds = getdtablesize() - SPARE_FDS; | |
| if (nftw(fpath, process, nfds, flags) != 0){ // | FTW_CHDIR | |
| fprintf(stderr, "Invalid fpath: %s", fpath); | |
| return; | |
| } | |
| return; | |
| } | |
| int process(const char *fpath, const struct stat *sb, int flag, struct FTW *ftwbuf){ | |
| int retval = 0; | |
| const char *filename = fpath + ftwbuf->base; | |
| if (flag == FTW_F){ | |
| if (use_filename_regex && regexec(&r, filename, 0, 0, 0)){ | |
| return 0; | |
| } | |
| int fd = open(fpath, O_RDONLY | O_BINARY); | |
| if (fd < 0){ | |
| perror(fpath); | |
| } else { | |
| searchfile(fpath, fd); | |
| close(fd); | |
| } | |
| } | |
| return 0; | |
| } | |
| void sig_SIGUSR1_handler(int signo){ | |
| if (signo == SIGUSR1){ | |
| print_status_by_sig = true; | |
| if (signal(SIGUSR1, sig_SIGUSR1_handler) == SIG_ERR){ | |
| exit(EXIT_FAILURE); | |
| } | |
| } | |
| } | |
| int main(int argc, char **argv){ | |
| int opt; | |
| int ftw_flags = FTW_PHYS | FTW_DEPTH; | |
| if (argc < 2){ | |
| usage(argv); | |
| } | |
| while ((opt = getopt(argc, argv, "A:B:C:P:c:g:l:e:")) != -1){ | |
| switch (opt){ | |
| case 'A': | |
| bytes_after = atoi(optarg); | |
| break; | |
| case 'B': | |
| bytes_before = atoi(optarg); | |
| break; | |
| case 'C': | |
| bytes_before = bytes_after = atoi(optarg); | |
| break; | |
| case 'P': { | |
| if (regcomp(&r, optarg, REG_EXTENDED | REG_NOSUB)){ | |
| die("Invalid filename regex pattern"); | |
| } | |
| use_filename_regex = true; | |
| } break; | |
| case 'c': { | |
| ftw_flags |= FTW_CHDIR; | |
| } break; | |
| case 'g': | |
| greater_or_equal_offset = atoll(optarg); | |
| use_greater_or_equal_offset = true; | |
| break; | |
| case 'l': | |
| less_or_equal_offset = atoll(optarg); | |
| use_less_or_equal_offset = true; | |
| break; | |
| case 'e': | |
| equal_offset = atoll(optarg); | |
| use_equal_offset = true; | |
| break; | |
| default: | |
| usage(argv); | |
| } | |
| } | |
| if (bytes_before < 0) | |
| die("Invalid grep_value %d for bytes before", bytes_before); | |
| if (bytes_after < 0) | |
| die("Invalid grep_value %d for bytes after", bytes_after); | |
| argv += optind - 1; /* advance the pointer to the first non-opt arg */ | |
| argc -= optind - 1; | |
| char *h = argv[1]; | |
| enum { MODE_HEX, MODE_TXT, MODE_TXT_ESC } parse_mode = MODE_HEX; | |
| while (*h && (parse_mode != MODE_HEX || h[1]) && grep_len < 0x100){ | |
| int on_quote = (h[0] == '"'); | |
| int on_esc = (h[0] == '\\'); | |
| switch (parse_mode){ | |
| case MODE_HEX: | |
| if (on_quote){ | |
| parse_mode = MODE_TXT; | |
| h++; | |
| continue; /* works under switch - will continue the loop*/ | |
| } | |
| break; /* this one is for switch */ | |
| case MODE_TXT: | |
| if (on_quote){ | |
| parse_mode = MODE_HEX; | |
| h++; | |
| continue; | |
| } | |
| if (on_esc){ | |
| parse_mode = MODE_TXT_ESC; | |
| h++; | |
| continue; | |
| } | |
| grep_value[grep_len] = h[0]; | |
| grep_mask[grep_len++] = 0xff; | |
| h++; | |
| continue; | |
| case MODE_TXT_ESC: | |
| grep_value[grep_len] = h[0]; | |
| grep_mask[grep_len++] = 0xff; | |
| parse_mode = MODE_TXT; | |
| h++; | |
| continue; | |
| } | |
| if (h[0] == '?' && h[1] == '?'){ | |
| grep_value[grep_len] = grep_mask[grep_len] = 0; | |
| grep_len++; | |
| h += 2; | |
| } else if (h[0] == ' '){ | |
| h++; | |
| } else { | |
| int v0 = ascii2hex(*h++), | |
| v1 = ascii2hex(*h++); | |
| if ((v0 == -1) || (v1 == -1)){ | |
| fprintf(stderr, "invalid hex string!\n"); | |
| return EXIT_FAILURE; | |
| } | |
| grep_value[grep_len] = (v0 << 4) | v1; | |
| grep_mask[grep_len++] = 0xFF; | |
| } | |
| } | |
| if (!grep_len || *h){ | |
| fprintf(stderr, "invalid/empty search string\n"); | |
| return EXIT_FAILURE; | |
| } | |
| if (signal(SIGUSR1, sig_SIGUSR1_handler) == SIG_ERR){ | |
| fprintf(stderr, "can't catch SIGUSR1\n"); | |
| return EXIT_FAILURE; | |
| } | |
| if (argc < 3){ | |
| searchfile("stdin", 0); | |
| } else { | |
| int c = 2; | |
| while (c < argc){ | |
| walk(argv[c++], ftw_flags); | |
| } | |
| } | |
| return EXIT_SUCCESS; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment