Skip to content

Instantly share code, notes, and snippets.

@nezlooy
Last active October 31, 2019 18:14
Show Gist options
  • Select an option

  • Save nezlooy/5a44f468ea19018bf0e2491ec886cd94 to your computer and use it in GitHub Desktop.

Select an option

Save nezlooy/5a44f468ea19018bf0e2491ec886cd94 to your computer and use it in GitHub Desktop.
Binary Grep
#define _XOPEN_SOURCE 1 /* Required under GLIBC for nftw() */
#define _XOPEN_SOURCE_EXTENDED 1 /* Same */
/*
sudo gcc -O2 -x c -o /usr/bin/bgrep bgrep.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <getopt.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <limits.h>
#include <ftw.h>
#include <regex.h>
#include <errno.h>
#include <libgen.h>
#include <signal.h>
#define BGREP_VERSION "0.5.1"
#define SPARE_FDS 5
extern int process(const char *fpath, const struct stat *sb, int flag, struct FTW *ftwbuf);
#ifndef O_BINARY
#define O_BINARY 0
#endif
#define CLR_BOLD "\033[1m"
#define CLR_NBOLD "\033[22m"
#define CLR_UNDERLINE "\033[4m"
#define CLR_NUNDERLINE "\033[4m"
#define CLR_INVERSE "\033[7m"
#define CLR_NINVERSE "\033[7m"
#define CLR_GRAY "\033[90m"
#define CLR_RED "\033[91m"
#define CLR_GREEN "\033[92m"
#define CLR_YELLOW "\033[93m"
#define CLR_BLUE "\033[94m"
#define CLR_PURPLE "\033[95m"
#define CLR_CYAN "\033[96m"
#define CLR_BREAK "\033[0m"
int bytes_before = 0, bytes_after = 0;
unsigned long long greater_or_equal_offset = 0,
less_or_equal_offset = 0,
equal_offset = 0;
bool use_greater_or_equal_offset = false,
use_less_or_equal_offset = false,
use_equal_offset = false,
use_filename_regex = false;
bool print_status_by_sig = false;
unsigned char grep_value[0x100], grep_mask[0x100];
int grep_len = 0;
regex_t r;
void die(const char* msg, ...);
void print_status_by_sig_fn(const char *msg, ...);
void print_char(unsigned char c){
if (isprint(c))
putchar(c);
else
printf("\\x%02x", (int)c);
}
int ascii2hex(char c){
if (c < '0')
return -1;
else if (c <= '9')
return c - '0';
else if (c < 'A')
return -1;
else if (c <= 'F')
return c - 'A' + 10;
else if (c < 'a')
return -1;
else if (c <= 'f')
return c - 'a' + 10;
else
return -1;
}
/* TODO: this will not work with STDIN or pipes
* we have to maintain a window of the bytes before which I am too lazy to do
* right now.
*/
void dump_context(int fd, unsigned long long pos){
off_t save_pos = lseek(fd, 0, SEEK_CUR);
if (save_pos == (off_t)-1){
perror("lseek");
return; /* this one is not fatal*/
}
char buf[1024];
off_t start = pos - bytes_before;
if (pos == 0)
start = 0;
int bytes_to_read = bytes_before + bytes_after;
if (lseek(fd, start, SEEK_SET) == (off_t)-1){
perror("lseek");
return;
}
for (;bytes_to_read;){
int read_chunk = bytes_to_read > sizeof(buf) ? sizeof(buf) : bytes_to_read;
int bytes_read = read(fd, buf, read_chunk);
if (bytes_to_read < 0){
perror("read");
die("Error reading context");
}
char *buf_end = buf + read_chunk;
char *p = buf;
printf("\t");
for (; p < buf_end; p++)
print_char(*p);
bytes_to_read -= read_chunk;
}
printf("\n\n");
if (lseek(fd, save_pos, SEEK_SET) == (off_t)-1){
perror("lseek");
die("Could not restore the original file offset while printing context");
}
}
unsigned int files_counter = 0;
void searchfile(const char *fpath, int fd){
off_t offset = 0;
unsigned char buf[1024];
if (use_greater_or_equal_offset && (offset = use_greater_or_equal_offset) && lseek(fd, use_greater_or_equal_offset, SEEK_SET) == (off_t)-1){
perror("use_greater_or_equal_offset && llseek");
return;
}
if (use_equal_offset && (offset = equal_offset) && lseek(fd, equal_offset, SEEK_SET) == (off_t)-1){
perror("use_greater_or_equal_offset && llseek");
return;
}
files_counter++;
if (print_status_by_sig){
fprintf(stderr, "%sScanning for %s%s\"%s\"%s %s(files counter: %d)%s\n", CLR_GREEN, CLR_BREAK, CLR_RED, fpath, CLR_BREAK, CLR_YELLOW, files_counter, CLR_BREAK);
fflush(stderr);
print_status_by_sig = false;
}
int len = grep_len;
len--;
while (1){
int r;
void *mv = memmove(buf, buf + sizeof(buf) - len, len);
r = read(fd, buf + len, sizeof(buf) - len);
if (r < 0){
perror("read");
return;
} else
if (!r)
return;
int o, i;
for (o = offset ? 0 : len; o < r; ++o){
for (i = 0; i <= len; ++i)
if ((buf[o + i] & grep_mask[i]) != grep_value[i])
break;
if (i > len){
unsigned long long pos = (unsigned long long)(offset + o - len);
if (use_equal_offset && equal_offset != pos)
return;
printf("%sFound in \"%s\" at %s%s0x%llx ( %d )%s\n", CLR_CYAN, fpath, CLR_BREAK, CLR_YELLOW, pos, pos, CLR_BREAK);
if (bytes_before || bytes_after)
dump_context(fd, pos);
if (use_equal_offset)
return;
}
}
offset += r;
if (use_less_or_equal_offset && offset > less_or_equal_offset)
return;
if (use_equal_offset && offset > equal_offset)
return;
}
}
void die(const char *msg, ...){
va_list ap;
va_start(ap, msg);
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(EXIT_FAILURE);
}
void usage(char** argv){
fprintf(stderr, "bgrep version: %s\n", BGREP_VERSION);
fprintf(stderr, "usage: %s [args] <hex> [<path> [...]]\n\n", *argv);
fprintf(stderr, "args:\n");
fprintf(stderr, " -B n show N bytes before\n");
fprintf(stderr, " -A n show N bytes after\n");
fprintf(stderr, " -C n show N bytes before and after\n");
fprintf(stderr, " -P \"REGEXP\" filename regex pattern\n");
fprintf(stderr, " -c `chdir` to each directory before handling its contents\n");
fprintf(stderr, " -g n offset is greater than or equal\n");
fprintf(stderr, " -l n offset is less than or equal\n");
fprintf(stderr, " -e n offset is equal\n");
exit(1);
}
void walk(char *fpath, int flags){
struct stat s;
if (lstat(fpath, &s)){
perror(fpath);
return;
}
if (!S_ISDIR(s.st_mode)){
if (use_filename_regex && regexec(&r, basename(fpath), 0, 0, 0)){
return;
}
int fd = open(fpath, O_RDONLY | O_BINARY);
if (fd < 0){
perror(fpath);
} else {
searchfile(fpath, fd);
close(fd);
}
return;
}
int nfds = getdtablesize() - SPARE_FDS;
if (nftw(fpath, process, nfds, flags) != 0){ // | FTW_CHDIR
fprintf(stderr, "Invalid fpath: %s", fpath);
return;
}
return;
}
int process(const char *fpath, const struct stat *sb, int flag, struct FTW *ftwbuf){
int retval = 0;
const char *filename = fpath + ftwbuf->base;
if (flag == FTW_F){
if (use_filename_regex && regexec(&r, filename, 0, 0, 0)){
return 0;
}
int fd = open(fpath, O_RDONLY | O_BINARY);
if (fd < 0){
perror(fpath);
} else {
searchfile(fpath, fd);
close(fd);
}
}
return 0;
}
void sig_SIGUSR1_handler(int signo){
if (signo == SIGUSR1){
print_status_by_sig = true;
if (signal(SIGUSR1, sig_SIGUSR1_handler) == SIG_ERR){
exit(EXIT_FAILURE);
}
}
}
int main(int argc, char **argv){
int opt;
int ftw_flags = FTW_PHYS | FTW_DEPTH;
if (argc < 2){
usage(argv);
}
while ((opt = getopt(argc, argv, "A:B:C:P:c:g:l:e:")) != -1){
switch (opt){
case 'A':
bytes_after = atoi(optarg);
break;
case 'B':
bytes_before = atoi(optarg);
break;
case 'C':
bytes_before = bytes_after = atoi(optarg);
break;
case 'P': {
if (regcomp(&r, optarg, REG_EXTENDED | REG_NOSUB)){
die("Invalid filename regex pattern");
}
use_filename_regex = true;
} break;
case 'c': {
ftw_flags |= FTW_CHDIR;
} break;
case 'g':
greater_or_equal_offset = atoll(optarg);
use_greater_or_equal_offset = true;
break;
case 'l':
less_or_equal_offset = atoll(optarg);
use_less_or_equal_offset = true;
break;
case 'e':
equal_offset = atoll(optarg);
use_equal_offset = true;
break;
default:
usage(argv);
}
}
if (bytes_before < 0)
die("Invalid grep_value %d for bytes before", bytes_before);
if (bytes_after < 0)
die("Invalid grep_value %d for bytes after", bytes_after);
argv += optind - 1; /* advance the pointer to the first non-opt arg */
argc -= optind - 1;
char *h = argv[1];
enum { MODE_HEX, MODE_TXT, MODE_TXT_ESC } parse_mode = MODE_HEX;
while (*h && (parse_mode != MODE_HEX || h[1]) && grep_len < 0x100){
int on_quote = (h[0] == '"');
int on_esc = (h[0] == '\\');
switch (parse_mode){
case MODE_HEX:
if (on_quote){
parse_mode = MODE_TXT;
h++;
continue; /* works under switch - will continue the loop*/
}
break; /* this one is for switch */
case MODE_TXT:
if (on_quote){
parse_mode = MODE_HEX;
h++;
continue;
}
if (on_esc){
parse_mode = MODE_TXT_ESC;
h++;
continue;
}
grep_value[grep_len] = h[0];
grep_mask[grep_len++] = 0xff;
h++;
continue;
case MODE_TXT_ESC:
grep_value[grep_len] = h[0];
grep_mask[grep_len++] = 0xff;
parse_mode = MODE_TXT;
h++;
continue;
}
if (h[0] == '?' && h[1] == '?'){
grep_value[grep_len] = grep_mask[grep_len] = 0;
grep_len++;
h += 2;
} else if (h[0] == ' '){
h++;
} else {
int v0 = ascii2hex(*h++),
v1 = ascii2hex(*h++);
if ((v0 == -1) || (v1 == -1)){
fprintf(stderr, "invalid hex string!\n");
return EXIT_FAILURE;
}
grep_value[grep_len] = (v0 << 4) | v1;
grep_mask[grep_len++] = 0xFF;
}
}
if (!grep_len || *h){
fprintf(stderr, "invalid/empty search string\n");
return EXIT_FAILURE;
}
if (signal(SIGUSR1, sig_SIGUSR1_handler) == SIG_ERR){
fprintf(stderr, "can't catch SIGUSR1\n");
return EXIT_FAILURE;
}
if (argc < 3){
searchfile("stdin", 0);
} else {
int c = 2;
while (c < argc){
walk(argv[c++], ftw_flags);
}
}
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment