Skip to content

Instantly share code, notes, and snippets.

@Annihil
Created April 4, 2016 18:55
Show Gist options
  • Select an option

  • Save Annihil/25e316e6d890236c2861c662d251275b to your computer and use it in GitHub Desktop.

Select an option

Save Annihil/25e316e6d890236c2861c662d251275b to your computer and use it in GitHub Desktop.

Revisions

  1. Annihil renamed this gist Apr 4, 2016. 1 changed file with 0 additions and 0 deletions.
  2. Annihil renamed this gist Apr 4, 2016. 1 changed file with 0 additions and 0 deletions.
  3. Annihil created this gist Apr 4, 2016.
    118 changes: 118 additions & 0 deletions substringCount_benchmarks.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,118 @@
    #include <iostream>
    #include <string>
    #include <algorithm>
    #include <string.h>
    #include <stdio.h>

    using namespace std;

    inline int countSubstring(const string &str, const string &sub) {
    if (sub.length() == 0) return 0;
    int count = 0;
    for (size_t offset = str.find(sub); offset != std::string::npos;
    offset = str.find(sub, offset + sub.length())) {
    ++count;
    }
    return count;
    }

    char* strnchr(const char *s, int c, int len) {
    int cpt;
    for (cpt = 0; cpt < len && s[cpt]; cpt++)
    if (s[cpt] == c)
    return ((char *) s + cpt);
    return (NULL);
    }

    char* strfaststr(unsigned char *haystack, unsigned int hl, unsigned char *needle, unsigned int nl) {
    char *cpt, *found, *end;
    if (hl < nl || !haystack || !needle || !nl || !hl) return (NULL);
    cpt = (char *) haystack;
    end = (char *) haystack + hl;
    while (cpt < end) {
    found = strnchr((const char *) cpt, (int) needle[0], hl);
    if (!found) return (NULL);
    if (nl == 1) return (found);
    if (!strncmp((const char *) found + 1, (const char *) needle + 1, nl - 1))
    return ((char *) found);
    else {
    if (found + nl >= end)
    break;
    if (found + nl < end)
    cpt = found + 1;
    }
    }
    return (NULL);
    }

    void bench() {
    string s = /*string(131000, '_') +*/ "Q#VNMQiQsk=select&O7Y53J8DJH=stvLul1pHf&WuBAGVsdyt=iIQJi#wgtX&Z2BzgjBw7y=2ZUZSWk60r&select=uhI3W1WXnW&RQUIwOg#VN=GS3ErZJ1WG&JBndQVNkgC=Ct4ekpiIc7&rOG8D7#GES=select";
    char* szStr = &s[0];
    size_t len = s.size();

    string pattern = string("#");
    char* szPattern = &pattern[0];
    size_t patternLen = pattern.size();

    int sum = 0;
    int i = 1;
    for (i = 0; i < 100000000; i++) {
    /* MEMMEM */
    char* p;
    int c = 0;
    int tmp_idx = 0;
    while ((p = (char*) memmem(szStr + tmp_idx, len - tmp_idx, szPattern, patternLen)) != NULL) {
    c++;
    tmp_idx = (p - szStr) + patternLen;
    }
    sum += c;

    /* STRSTR */
    char* p = szStr;
    int c = 0;
    while ((p = strstr(p, szPattern)) != NULL) {
    c++;
    p += patternLen;
    }
    sum += c;

    /* STRFASTSTR */
    int nb_match = 0;
    int tmp_idx = 0;
    while (1) {
    unsigned char* ret = (unsigned char *) strfaststr((unsigned char *) szStr + tmp_idx, (unsigned int) len - tmp_idx, (unsigned char *) szPattern, (unsigned int) patternLen);
    if (ret) {
    nb_match++;
    }
    else
    break;
    if (nb_match && ret < ((unsigned char *) szStr + len)) {
    tmp_idx = (ret - (unsigned char *) szStr) + 1;
    if (tmp_idx > (int) (len - 1))
    break;
    }
    else
    break;
    }
    sum += nb_match;

    /* STRCHR */
    char* p = szStr;
    int c = 0;
    while ((p = strchr(p, szPattern[0])) != NULL) {
    c++;
    p++;
    }
    sum += c;

    /* STRING::FIND */
    sum += countSubstring(s, pattern);
    }

    cout << "occurence: " << sum / i;
    }

    int main() {
    bench();
    return 0;
    }