"No one cares about the old scene people anymore, I’m sure,
bunch of old people grepping for the last of the memcpy."-- Bas Alberts
A collection of my weggli patterns to facilitate vulnerability research.
Blog post:
https://security.humanativaspa.it/a-collection-of-weggli-patterns-for-c-cpp-vulnerability-research
See also:
https://github.com/weggli-rs/weggli
https://dustri.org/b/playing-with-weggli.html
https://github.com/plowsec/weggli-patterns
https://github.com/synacktiv/Weggli_rules_SSTIC2023
https://twitter.com/richinseattle/status/1729654184633327720
weggli -R 'func=^gets$' '{$func();}' .
weggli -R 'func=st(r|p)(cpy|cat)$' '{$func();}' .
weggli -R 'func=wc(s|p)(cpy|cat)$' '{$func();}' .
weggli -R 'func=sprintf$' '{$func();}' .
weggli -R 'func=scanf$' '{$func();}' .
weggli '{strncat(_,_,sizeof(_));}' .
weggli '{strncat(_,_,strlen(_));}' .
weggli '{strncat($dst,$src,sizeof($dst)-strlen($dst));}' .
weggli '{_ $buf[$len]; strncat($buf,_,$len);}' .
The last pattern won't work with integer literals due to known limitations.
weggli -R 'func=cpy$' '{$func(_,$src,_($src));}' .
weggli -R 'func=cpy$' '{$len=_($src); $func(_,$src,$len);}' .
weggli -R 'func=cpy$' '{_ $src[$len]; $func($dst,$src,$len);}' .
The last pattern won't work with integer literals due to known limitations.
weggli '{_* $ptr; sizeof($ptr);}' .
weggli '{_* $ptr=_; sizeof($ptr);}' .
weggli '_ $func(_* $ptr) {sizeof($ptr);}' .
Apparently, global variables are not supported so this won't work:
weggli '_* $ptr=_; _ $func(_) {sizeof($ptr);}' .
weggli "sizeof('_')" .
In C (but not in C++) character constants have type int.
weggli -R 'func=ncpy$' '{$func($buf,_); not:$buf[_]=_;}' .
Some possible variants: memcpy, read, readlink, fread, etc.
weggli '{$buf[sizeof($buf)];}' .
weggli '{_ $buf[$len]; $buf[$len]=_;}' .
weggli '{strlen($src)>sizeof($dst);}' .
weggli '{strlen($src)<=sizeof($dst);}' .
weggli '{sizeof($dst)<strlen($src);}' .
weggli '{sizeof($dst)>=strlen($src);}' .
weggli '{$buf[strlen($buf)-1];}' .
weggli -R 'func=allocf?$' '{$func(strlen($buf));}' .
weggli -R 'func=allocf?$' '{$len=strlen(_); $ptr=$func($len);}' .
weggli -R 'func=allocf?$' '{$len=snprintf(_); $ptr=$func($len);}' .
The second pattern won't work with integer literals due to known limitations.
<
should also cover >
and <=
should also cover >=
; however, let's keep all variants just to be sure.
weggli '{_* $ptr1; $ptr1-$ptr2;}' .
weggli '{_* $ptr2; $ptr1-$ptr2;}' .
weggli '{_* $ptr1=_; $ptr1-$ptr2;}' .
weggli '{_* $ptr2=_; $ptr1-$ptr2;}' .
weggli '_ $func(_* $ptr1) {$ptr1-$ptr2;}' .
weggli '_ $func(_* $ptr2) {$ptr1-$ptr2;}' .
weggli -R 'func=(nprintf|lcpy|lcat)$' '{$ret=$func();}' .
weggli -R 'func=(cpy|cat|memmove|memset|sn?printf)$' '{_ $buf[_]; $func($buf,_);}' .
weggli '{_ $buf[_]; $buf[_]=_;}' .
Some possible variants: bcopy, gets, fgets, getwd, getcwd, fread, read, pread, recv, recvfrom, etc.
weggli -R '$type=(unsigned|size_t)' '{$type $var; $var<0;}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var; $var<=0;}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var; $var>=0;}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var=_; $var<0;}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var=_; $var<=0;}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var=_; $var>=0;}' .
<
should also cover >
and <=
should also cover >=
; however, let's keep all variants just to be sure.
weggli -R '$copy=(cpy|ncat)$' '{int $len; $copy(_,_,$len);}' .
weggli -R '$copy=(cpy|ncat)$' '{int $len=_; $copy(_,_,$len);}' .
weggli -R '$copy=(cpy|ncat)$' '_ $func(int $len) {$copy(_,_,$len);}' .
weggli -R '$copy=nprintf$' '{int $len; $copy(_,$len);}' .
weggli -R '$copy=nprintf$' '{int $len=_; $copy(_,$len);}' .
weggli -R '$copy=nprintf$' '_ $func(int $len) {$copy(_,$len);}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var1; int $var2; $var2=_($var1);}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var1; int $var2; $var1=_($var2);}' .
weggli -R '$type=(unsigned|size_t)' '{$type $var1; int $var2=_($var1);}' .
weggli -R '$type=(unsigned|size_t)' '{int $var1; $type $var2; $var2=_($var1);}' .
weggli -R '$type=(unsigned|size_t)' '{int $var1; $type $var2; $var1=_($var2);}' .
weggli -R '$type=(unsigned|size_t)' '{int $var1=_; $type $var2=_($var1);}' .
weggli -R '$type=(unsigned|size_t)' '_ $func(int $var2) {$type $var1; $var1=_($var2);}' .
weggli -R '$type=(unsigned|size_t)' '_ $func(int $var2) {$type $var1=_($var2);}' .
weggli -R '$type=(unsigned|size_t)' '$type $func(_) {int $var; return $var;}' .
weggli -R '$type=(unsigned|size_t)' 'int $func(_) {$type $var; return $var;}' .
There are many possible variants of these patterns...
weggli -R 'type=(short|int|long)' '{$type $large; char $narrow; $narrow = $large; }' .
weggli -R 'type=(short|int|long)' '{$type $large; char $narrow = $large; }' .
weggli -R 'type=(int|long)' '{$type $large; short $narrow; $narrow = $large; }' .
weggli -R 'type=(int|long)' '{$type $large; short $narrow = $large; }' .
weggli '{long $large; int $narrow; $narrow = $large; }' .
weggli '{long $large; int $narrow = $large; }' .
weggli -R 'type=(short|int|long)' '_ $func($type $large) {char $narrow; $narrow = $large; }' .
weggli -R 'type=(short|int|long)' '_ $func($type $large) {char $narrow = $large; }' .
weggli -R 'type=(int|long)' '_ $func($type $large) {short $narrow; $narrow = $large; }' .
weggli -R 'type=(int|long)' '_ $func($type $large) {short $narrow = $large; }' .
weggli '_ $func(long $large) {int $narrow; $narrow = $large; }' .
weggli '_ $func(long $large) {int $narrow = $large; }' .
There are many possible variants of these patterns...
weggli 'short _' .
weggli 'int _' .
Some possible variants: short int, unsigned short, unsigned short int, int.
weggli -R 'func=(str|wcs)len$' '{short $len; $len=$func();}' .
Some possible variants: short int, unsigned short, unsigned short int, even signed int.
weggli -R 'func=allocf?$' '{$func(_*_);}' .
weggli -R 'func=allocf?$' '{$func(_+_);}' .
weggli -R 'func=allocf?$' '{$n=_*_; $func($n);}' .
weggli -R 'func=allocf?$' '{$n=_+_; $func($n);}' .
weggli -R 'alloc=allocf?$' -R 'copy=cpy$' '{$alloc($x*_); $copy(_,_,$x);}' .
weggli -R 'alloc=allocf?$' -R 'copy=cpy$' '{$alloc($x+_); $copy(_,_,$x);}' .
weggli -u -R 'alloc=allocf?$' -R 'copy=cpy$' '{$n=_*_; $alloc($n); $copy(_,_,$x);}' .
weggli -u -R 'alloc=allocf?$' -R 'copy=cpy$' '{$n=_+_; $alloc($n); $copy(_,_,$x);}' .
weggli '{$x>_||($x+$y)>_;}' .
weggli '{$x>=_||($x+$y)>_;}' .
weggli '{$x>_||($x+$y)>=_;}' .
weggli '{$x>=_||($x+$y)>=_;}' .
weggli '{$x<_&&($x+$y)<_;}' .
weggli '{$x<=_&&($x+$y)<_;}' .
weggli '{$x<_&&($x+$y)<=_;}' .
weggli '{$x<=_&&($x+$y)<=_;}' .
weggli '{$x>_||($x*$y)>_;}' .
weggli '{$x>=_||($x*$y)>_;}' .
weggli '{$x>_||($x*$y)>=_;}' .
weggli '{$x>=_||($x*$y)>=_;}' .
weggli '{$x<_&&($x*$y)<_;}' .
weggli '{$x<=_&&($x*$y)<_;}' .
weggli '{$x<_&&($x*$y)<=_;}' .
weggli '{$x<=_&&($x*$y)<=_;}' .
<
should also cover >
and <=
should also cover >=
; however, let's keep all variants just to be sure.
weggli -R 'func=(printf|scanf|syslog)$' '{$func();}' .
Some possible variants: printk, warn, vwarn, warnx, vwarnx, err, verr, errx, verrx, warnc, vwarnc, errc, verrc, etc.
weggli -R 'func=alloca$' '{$func();}' .
weggli '{free($ptr); not:$ptr=_; not:free($ptr); _($ptr);}' .
weggli '{free($ptr); not:$ptr=_; free($ptr);}' .
weggli '{_ $ptr[]; free($ptr);}' .
weggli '{_ $ptr[]=_; free($ptr);}' .
weggli '{_ $ptr[]; $ptr2=$ptr; free($ptr2);}' .
weggli '{_ $ptr[]=_; $ptr2=$ptr; free($ptr2);}' .
weggli '{_ $var; free(&$var);}' .
weggli '{_ $var=_; free(&$var);}' .
weggli '{_ $var[]; free(&$var);}' .
weggli '{_ $var[]=_; free(&$var);}' .
weggli '{_ *$var; free(&$var);}' .
weggli '{_ *$var=_; free(&$var);}' .
weggli '{$ptr=alloca(_); free($ptr);}' .
weggli '{_ $ptr[]; return $ptr;}' .
weggli '{_ $ptr[]=_; return $ptr;}' .
weggli '{_ $ptr[]; $ptr2=$ptr; return $ptr2;}' .
weggli '{_ $ptr[]=_; $ptr2=$ptr; return $ptr2;}' .
weggli '{_ $var; return &$var;}' .
weggli '{_ $var=_; return &$var;}' .
weggli '{_ $var[]; return &$var;}' .
weggli '{_ $var[]=_; return &$var;}' .
weggli '{_ *$var; return &$var;}' .
weggli '{_ *$var=_; return &$var;}' .
weggli -R 'func=allocf?$' '{$ret=$func(); not:if(_($ret)){};}' .
weggli '{_ $ptr[]; putenv($ptr);}' .
weggli '{_ $ptr[]=_; putenv($ptr);}' .
weggli '{_ $ptr[]; $ptr2=$ptr; putenv($ptr2);}' .
weggli '{_ $ptr[]=_; $ptr2=$ptr; putenv($ptr2);}' .
weggli -R 'func=printf$' -R 'fmt=(.*%\w*x.*|.*%\w*X.*|.*%\w*p.*)' '{$func("$fmt");}' .
weggli -R 'func=allocf?$|strdn?up$' '{not:$ptr=$func(); free($ptr);}' .
weggli --cpp -R 'func=allocf?$|strn?dup$' '{not:$ptr=$func(); free($ptr);}' .
weggli --cpp '{not:$ptr=new $obj; delete $ptr;}' .
Apparently, delete[] is not supported so this won't work properly:
weggli --cpp '{not:$ptr=new $obj[$len]; delete[] $ptr;}' .
weggli '{_* $ptr; not:$ptr=_; not:_(&$ptr); $func($ptr);}' .
weggli '{_* $ptr; not:$ptr=_; not:_(&$ptr); _($ptr);}' .
These patterns might generate many false positives that should be manually investigated.
weggli -R 'func=(system|popen)$' '{$func();}' .
weggli -R 'func=(system|popen)$' '{$func($arg);}' .
The second pattern is meant to filter out string literals, but it might cause some false negatives.
weggli -R 'func=(access|l?stat)$' '{$func();}' .
weggli -R 'func=(mktemp|te?mpnam)$' '{$func();}' .
weggli -R 'func=signal$' '{$func();}' .
weggli '{not:setuid(0); setuid(); setgid();}' .
weggli '{not:seteuid(0); seteuid(); not:seteuid(0); setegid();}' .
weggli '{not:seteuid(0); seteuid(); not:seteuid(0); setuid();}' .
weggli '{not:seteuid(0); seteuid(); not:seteuid(0); seteuid();}' .
weggli -R 'func=sete?uid$' '{strict:$func();}' .
weggli -R 'func=memset(_explicit)?$' '{$func(_,_,0);}' .
weggli -R 'func=memset(_explicit)?$' '{$func(_,sizeof(_),_);}' .
weggli -R 'func=s?rand$' '{$func();}' .
weggli -R 'func=^sn?printf$' '{$func($dst,_,$dst);}' .
weggli -R 'func=^sn?printf$' '{$func($dst,_,_,$dst);}' .
weggli -R 'func=^sn?printf$' '{$func($dst,_,_,_,$dst);}' .
And so on...
weggli -R 'assert=(?i)^\w*assert\w*\s*$' '{$assert(_<_);}' .
weggli -R 'assert=(?i)^\w*assert\w*\s*$' '{$assert(_<=_);}' .
weggli -R 'assert=(?i)^\w*assert\w*\s*$' '{$assert(_>_);}' .
weggli -R 'assert=(?i)^\w*assert\w*\s*$' '{$assert(_>=_);}' .
<
should also cover >
and <=
should also cover >=
; however, let's keep all variants just to be sure.
weggli -R 'func=scanf$' '{strict:$func();}' .
weggli -R 'func=ato(i|ll?|f)$' '{$func();}' .
weggli -R 'var=argv|envp' '{$var[_];}' .
weggli -l 'switch(_) {_; not:default:_; _;}' .
-l
might be overkill and lead to missing additional matches in the same function.
weggli -l 'switch(_) {case _: not:break; not:exit; not:return; not:goto _; case _:_;}' .
-l
might be overkill and lead to missing additional matches in the same function.
weggli -R 'type!=void' '$type $func(_) {_; not:return;}' .
weggli '{for (_==_;_;_) {}}' .
weggli 'if (_=_) {}' .
weggli 'if (_&_) {}' .
weggli 'if (_|_) {}' .
weggli '{_=+_;}' .
weggli '{_=-_;}' .
weggli -R 'func=strn?cpy$' 'if ($func()==_) {}' .
There are many possible additional patterns in this category...
weggli -R 'pattern=(?i)(unsafe|insecure|dangerous|warning|overflow)' '$pattern' .
weggli -R 'func=(?i)(encode|decode|convert|interpret|compress|fragment|reassemble)' '_ $func(_) {}' .
weggli -R 'func=(?i)(mutex|lock|toctou|parallelism|semaphore|retain|release|garbage|mutual)' '_ $func(_) {}' .
There are many possible additional patterns in this category...