Skip to content

Commit

Permalink
Merge branch 'vmg/collisions'
Browse files Browse the repository at this point in the history
  • Loading branch information
kivikakk committed Jul 17, 2017
2 parents d219c0a + 62166fe commit 66a0836
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 78 deletions.
108 changes: 59 additions & 49 deletions src/references.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,6 @@
#include "inlines.h"
#include "chunk.h"

static unsigned int refhash(const unsigned char *link_ref) {
unsigned int hash = 0;

while (*link_ref)
hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;

return hash;
}

static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
cmark_mem *mem = map->mem;
if (ref != NULL) {
Expand Down Expand Up @@ -53,21 +44,6 @@ static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
return result;
}

static void add_reference(cmark_reference_map *map, cmark_reference *ref) {
cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];

while (t) {
if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) {
reference_free(map, ref);
return;
}

t = t->next;
}

map->table[ref->hash % REFMAP_SIZE] = ref;
}

void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
cmark_chunk *url, cmark_chunk *title) {
cmark_reference *ref;
Expand All @@ -77,64 +53,98 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
if (reflabel == NULL)
return;

assert(map->sorted == NULL);

ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
ref->label = reflabel;
ref->hash = refhash(ref->label);
ref->url = cmark_clean_url(map->mem, url);
ref->title = cmark_clean_title(map->mem, title);
ref->next = NULL;
ref->age = map->size;
ref->next = map->refs;

map->refs = ref;
map->size++;
}

static int
labelcmp(const unsigned char *a, const unsigned char *b) {
return strcmp((const char *)a, (const char *)b);
}

static int
refcmp(const void *p1, const void *p2) {
cmark_reference *r1 = *(cmark_reference **)p1;
cmark_reference *r2 = *(cmark_reference **)p2;
int res = labelcmp(r1->label, r2->label);
return res ? res : ((int)r1->age - (int)r2->age);
}

static int
refsearch(const void *label, const void *p2) {
cmark_reference *ref = *(cmark_reference **)p2;
return labelcmp((const unsigned char *)label, ref->label);
}

static void sort_references(cmark_reference_map *map) {
unsigned int i = 0, last = 0, size = map->size;
cmark_reference *r = map->refs, **sorted = NULL;

sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *));
while (r) {
sorted[i++] = r;
r = r->next;
}

qsort(sorted, size, sizeof(cmark_reference *), refcmp);

for (i = 1; i < size; i++) {
if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
sorted[++last] = sorted[i];
}

add_reference(map, ref);
map->sorted = sorted;
map->size = last + 1;
}

// Returns reference if refmap contains a reference with matching
// label, otherwise NULL.
cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
cmark_chunk *label) {
cmark_reference *ref = NULL;
cmark_reference **ref = NULL;
unsigned char *norm;
unsigned int hash;

if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
return NULL;

if (map == NULL)
if (map == NULL || !map->size)
return NULL;

norm = normalize_reference(map->mem, label);
if (norm == NULL)
return NULL;

hash = refhash(norm);
ref = map->table[hash % REFMAP_SIZE];

while (ref) {
if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm))
break;
ref = ref->next;
}
if (!map->sorted)
sort_references(map);

ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch);
map->mem->free(norm);
return ref;
return ref ? ref[0] : NULL;
}

void cmark_reference_map_free(cmark_reference_map *map) {
unsigned int i;
cmark_reference *ref;

if (map == NULL)
return;

for (i = 0; i < REFMAP_SIZE; ++i) {
cmark_reference *ref = map->table[i];
cmark_reference *next;

while (ref) {
next = ref->next;
reference_free(map, ref);
ref = next;
}
ref = map->refs;
while (ref) {
cmark_reference *next = ref->next;
reference_free(map, ref);
ref = next;
}

map->mem->free(map->sorted);
map->mem->free(map);
}

Expand Down
8 changes: 4 additions & 4 deletions src/references.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@
extern "C" {
#endif

#define REFMAP_SIZE 16

struct cmark_reference {
struct cmark_reference *next;
unsigned char *label;
cmark_chunk url;
cmark_chunk title;
unsigned int hash;
unsigned int age;
};

typedef struct cmark_reference cmark_reference;

struct cmark_reference_map {
cmark_mem *mem;
cmark_reference *table[REFMAP_SIZE];
cmark_reference *refs;
cmark_reference **sorted;
unsigned int size;
};

typedef struct cmark_reference_map cmark_reference_map;
Expand Down
2 changes: 1 addition & 1 deletion test/cmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None):
else:
libnames = [ ["lib", ".so"] ]
if not library_dir:
library_dir = os.path.join("build", "src")
library_dir = os.path.join("..", "build", "src")
for prefix, suffix in libnames:
candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix)
if os.path.isfile(candidate):
Expand Down
86 changes: 62 additions & 24 deletions test/pathological_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,31 @@
import argparse
import sys
import platform
import itertools
import multiprocessing
from cmark import CMark

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])
def hash_collisions():
REFMAP_SIZE = 16
COUNT = 50000

def badhash(ref):
h = 0
for c in ref:
a = (h << 6) & 0xFFFFFFFF
b = (h << 16) & 0xFFFFFFFF
h = ord(c) + a + b - h
h = h & 0xFFFFFFFF

return (h % REFMAP_SIZE) == 0

keys = ("x%d" % i for i in itertools.count())
collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
bad_key = next(collisions)

document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)

cmark = CMark(prog=args.program, library_dir=args.library_dir)
return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))

# list of pairs consisting of input and a regex that must match the output.
pathological = {
Expand Down Expand Up @@ -58,32 +72,56 @@
re.compile("abc\ufffd?de\ufffd?")),
"backticks":
("".join(map(lambda x: ("e" + "`" * x), range(1,10000))),
re.compile("^<p>[e`]*</p>\n$"))
re.compile("^<p>[e`]*</p>\n$")),
"reference collisions": hash_collisions()
}

whitespace_re = re.compile('/s+/')
passed = 0
errored = 0
failed = 0
TIMEOUT = 5

def run_test(inp, regex):
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])
cmark = CMark(prog=args.program, library_dir=args.library_dir)

print("Testing pathological cases:")
for description in pathological:
(inp, regex) = pathological[description]
[rc, actual, err] = cmark.to_html(inp)
if rc != 0:
errored += 1
print(description, '[ERRORED (return code %d)]' %rc)
print('[ERRORED (return code %d)]' % rc)
print(err)
exit(1)
elif regex.search(actual):
print(description, '[PASSED]')
passed += 1
print('[PASSED]')
else:
print(description, '[FAILED]')
print('[FAILED (mismatch)]')
print(repr(actual))
failed += 1
exit(1)

if __name__ == '__main__':
print("Testing pathological cases:")
for description in pathological:
(inp, regex) = pathological[description]
print(description, "... ", end='')
sys.stdout.flush()

p = multiprocessing.Process(target=run_test, args=(inp, regex))
p.start()
p.join(TIMEOUT)

if p.is_alive():
p.terminate()
p.join()
print('[TIMED OUT]')
errored += 1
elif p.exitcode != 0:
errored += 1
else:
passed += 1

print("%d passed, %d failed, %d errored" % (passed, failed, errored))
if (failed == 0 and errored == 0):
exit(0)
else:
exit(1)
print("%d passed, %d errored" % (passed, errored))
exit(errored)

0 comments on commit 66a0836

Please sign in to comment.