From 93ce5923e92f6d2db831d8715ec62734821c70ce Mon Sep 17 00:00:00 2001 From: Chirayu Krishnappa Date: Tue, 24 Sep 2013 18:47:45 -0700 Subject: [PATCH] feat($sce): simpler patterns for $sceDelegateProviders white/blacklists Closes #4006 --- docs/content/error/sce/imatcher.ngdoc | 9 ++ docs/content/error/sce/iwcard.ngdoc | 9 ++ src/ng/sce.js | 162 ++++++++++++++++++++---- test/ng/sceSpecs.js | 169 ++++++++++++++++++++++---- 4 files changed, 300 insertions(+), 49 deletions(-) create mode 100644 docs/content/error/sce/imatcher.ngdoc create mode 100644 docs/content/error/sce/iwcard.ngdoc diff --git a/docs/content/error/sce/imatcher.ngdoc b/docs/content/error/sce/imatcher.ngdoc new file mode 100644 index 000000000000..8c4f0a4c0eb5 --- /dev/null +++ b/docs/content/error/sce/imatcher.ngdoc @@ -0,0 +1,9 @@ +@ngdoc error +@name $sce:imatcher +@fullName Invalid matcher (only string patterns and RegExp instances are supported) +@description + +Please see {@link api/ng.$sceDelegateProvider#resourceUrlWhitelist +$sceDelegateProvider.resourceUrlWhitelist} and {@link +api/ng.$sceDelegateProvider#resourceUrlBlacklist $sceDelegateProvider.resourceUrlBlacklist} for the +list of acceptable items. diff --git a/docs/content/error/sce/iwcard.ngdoc b/docs/content/error/sce/iwcard.ngdoc new file mode 100644 index 000000000000..459b78d575a3 --- /dev/null +++ b/docs/content/error/sce/iwcard.ngdoc @@ -0,0 +1,9 @@ +@ngdoc error +@name $sce:iwcard +@fullName The sequence *** is not a valid pattern wildcard +@description + +The strings in {@link api/ng.$sceDelegateProvider#resourceUrlWhitelist +$sceDelegateProvider.resourceUrlWhitelist} and {@link +api/ng.$sceDelegateProvider#resourceUrlBlacklist $sceDelegateProvider.resourceUrlBlacklist} may not +contain the undefined sequence `***`. Only `*` and `**` wildcard patterns are defined. diff --git a/src/ng/sce.js b/src/ng/sce.js index ca54b58fcc41..577c80358c4f 100644 --- a/src/ng/sce.js +++ b/src/ng/sce.js @@ -12,6 +12,55 @@ var SCE_CONTEXTS = { JS: 'js' }; +// Helper functions follow. + +// Copied from: +// http://docs.closure-library.googlecode.com/git/closure_goog_string_string.js.source.html#line962 +// Prereq: s is a string. +function escapeForRegexp(s) { + return s.replace(/([-()\[\]{}+?*.$\^|,:# -1) { + throw $sceMinErr('iwcard', + 'Illegal sequence *** in string matcher. String: {0}', matcher); + } + matcher = escapeForRegexp(matcher). + replace('\\*\\*', '.*'). + replace('\\*', '[^:/.?&;]*'); + return new RegExp('^' + matcher + '$'); + } else if (isRegExp(matcher)) { + // The only other type of matcher allowed is a Regexp. + // Match entire URL / disallow partial matches. + // Flags are reset (i.e. no global, ignoreCase or multiline) + return new RegExp('^' + matcher.source + '$'); + } else { + throw $sceMinErr('imatcher', + 'Matchers may only be "self", string patterns or RegExp objects'); + } +} + + +function adjustMatchers(matchers) { + var adjustedMatchers = []; + if (isDefined(matchers)) { + forEach(matchers, function(matcher) { + adjustedMatchers.push(adjustMatcher(matcher)); + }); + } + return adjustedMatchers; +} + /** * @ngdoc service @@ -45,13 +94,37 @@ var SCE_CONTEXTS = { * @name ng.$sceDelegateProvider * @description * - * The $sceDelegateProvider provider allows developers to configure the {@link ng.$sceDelegate + * The `$sceDelegateProvider` provider allows developers to configure the {@link ng.$sceDelegate * $sceDelegate} service. This allows one to get/set the whitelists and blacklists used to ensure - * that URLs used for sourcing Angular templates are safe. Refer {@link + * that the URLs used for sourcing Angular templates are safe. Refer {@link * ng.$sceDelegateProvider#resourceUrlWhitelist $sceDelegateProvider.resourceUrlWhitelist} and * {@link ng.$sceDelegateProvider#resourceUrlBlacklist $sceDelegateProvider.resourceUrlBlacklist} * - * Read more about {@link ng.$sce Strict Contextual Escaping (SCE)}. + * For the general details about this service in Angular, read the main page for {@link ng.$sce + * Strict Contextual Escaping (SCE)}. + * + * **Example**: Consider the following case. + * + * - your app is hosted at url `http://myapp.example.com/` + * - but some of your templates are hosted on other domains you control such as + * `http://srv01.assets.example.com/`,  `http://srv02.assets.example.com/`, etc. + * - and you have an open redirect at `http://myapp.example.com/clickThru?...`. + * + * Here is what a secure configuration for this scenario might look like: + * + *
+ *    angular.module('myApp', []).config(function($sceDelegateProvider) {
+ *      $sceDelegateProvider.resourceUrlWhitelist([
+ *        // Allow same origin resource loads.
+ *        'self',
+ *        // Allow loading from our assets domain.  Notice the difference between * and **.
+ *        'http://srv*.assets.example.com/**']);
+ *
+ *      // The blacklist overrides the whitelist so the open redirect here is blocked.
+ *      $sceDelegateProvider.resourceUrlBlacklist([
+ *        'http://myapp.example.com/clickThru**']);
+ *      }); 
+ * 
*/ function $SceDelegateProvider() { @@ -68,28 +141,25 @@ function $SceDelegateProvider() { * @function * * @param {Array=} whitelist When provided, replaces the resourceUrlWhitelist with the value - * provided. This must be an array. - * - * Each element of this array must either be a regex or the special string `'self'`. - * - * When a regex is used, it is matched against the normalized / absolute URL of the resource - * being tested. + * provided. This must be an array or null. A snapshot of this array is used so further + * changes to the array are ignored. * - * The **special string** `'self'` can be used to match against all URLs of the same domain as the - * application document with the same protocol (allows sourcing https resources from http documents.) + * Follow {@link ng.$sce#resourceUrlPatternItem this link} for a description of the items allowed in + * this array. * - * Please note that **an empty whitelist array will block all URLs**! + * Note: **an empty whitelist array will block all URLs**! * * @return {Array} the currently set whitelist array. * - * The **default value** when no whitelist has been explicitly set is `['self']`. + * The **default value** when no whitelist has been explicitly set is `['self']` allowing only + * same origin resource requests. * * @description * Sets/Gets the whitelist of trusted resource URLs. */ this.resourceUrlWhitelist = function (value) { if (arguments.length) { - resourceUrlWhitelist = value; + resourceUrlWhitelist = adjustMatchers(value); } return resourceUrlWhitelist; }; @@ -101,13 +171,11 @@ function $SceDelegateProvider() { * @function * * @param {Array=} blacklist When provided, replaces the resourceUrlBlacklist with the value - * provided. This must be an array. + * provided. This must be an array or null. A snapshot of this array is used so further + * changes to the array are ignored. * - * Each element of this array must either be a regex or the special string `'self'` (see - * `resourceUrlWhitelist` for meaning - it's only really useful there.) - * - * When a regex is used, it is matched against the normalized / absolute URL of the resource - * being tested. + * Follow {@link ng.$sce#resourceUrlPatternItem this link} for a description of the items allowed in + * this array. * * The typical usage for the blacklist is to **block [open redirects](http://cwe.mitre.org/data/definitions/601.html)** * served by your domain as these would otherwise be trusted but actually return content from the redirected @@ -126,7 +194,7 @@ function $SceDelegateProvider() { this.resourceUrlBlacklist = function (value) { if (arguments.length) { - resourceUrlBlacklist = value; + resourceUrlBlacklist = adjustMatchers(value); } return resourceUrlBlacklist; }; @@ -147,7 +215,8 @@ function $SceDelegateProvider() { if (matcher === 'self') { return $$urlUtils.isSameOrigin(parsedUrl); } else { - return !!parsedUrl.href.match(matcher); + // definitely a regex. See adjustMatchers() + return !!matcher.exec(parsedUrl.href); } } @@ -457,9 +526,54 @@ function $SceDelegateProvider() { * | `$sce.RESOURCE_URL` | For URLs that are not only safe to follow as links, but whose contens are also safe to include in your application. Examples include `ng-include`, `src` / `ngSrc` bindings for tags other than `IMG` (e.g. `IFRAME`, `OBJECT`, etc.)

Note that `$sce.RESOURCE_URL` makes a stronger statement about the URL than `$sce.URL` does and therefore contexts requiring values trusted for `$sce.RESOURCE_URL` can be used anywhere that values trusted for `$sce.URL` are required. | * | `$sce.JS` | For JavaScript that is safe to execute in your application's context. Currently unused. Feel free to use it in your own directives. | * - * ## Show me an example. + * ## Format of items in {@link ng.$sceDelegateProvider#resourceUrlWhitelist resourceUrlWhitelist}/{@link ng.$sceDelegateProvider#resourceUrlBlacklist Blacklist} + * + * Each element in these arrays must be one of the following: + * + * - **'self'** + * - The special **string**, `'self'`, can be used to match against all URLs of the **same + * domain** as the application document using the **same protocol**. + * - **String** (except the special value `'self'`) + * - The string is matched against the full *normalized / absolute URL* of the resource + * being tested (substring matches are not good enough.) + * - There are exactly **two wildcard sequences** - `*` and `**`. All other characters + * match themselves. + * - `*`: matches zero or more occurances of any character other than one of the following 6 + * characters: '`:`', '`/`', '`.`', '`?`', '`&`' and ';'. It's a useful wildcard for use + * in a whitelist. + * - `**`: matches zero or more occurances of *any* character. As such, it's not + * not appropriate to use in for a scheme, domain, etc. as it would match too much. (e.g. + * http://**.example.com/ would match http://evil.com/?ignore=.example.com/ and that might + * not have been the intention.) It's usage at the very end of the path is ok. (e.g. + * http://foo.example.com/templates/**). + * - **RegExp** (*see caveat below*) + * - *Caveat*: While regular expressions are powerful and offer great flexibility, their syntax + * (and all the inevitable escaping) makes them *harder to maintain*. It's easy to + * accidentally introduce a bug when one updates a complex expression (imho, all regexes should + * have good test coverage.). For instance, the use of `.` in the regex is correct only in a + * small number of cases. A `.` character in the regex used when matching the scheme or a + * subdomain could be matched against a `:` or literal `.` that was likely not intended. It + * is highly recommended to use the string patterns and only fall back to regular expressions + * if they as a last resort. + * - The regular expression must be an instance of RegExp (i.e. not a string.) It is + * matched against the **entire** *normalized / absolute URL* of the resource being tested + * (even when the RegExp did not have the `^` and `$` codes.) In addition, any flags + * present on the RegExp (such as multiline, global, ignoreCase) are ignored. + * - If you are generating your Javascript from some other templating engine (not + * recommended, e.g. in issue [#4006](https://github.com/angular/angular.js/issues/4006)), + * remember to escape your regular expression (and be aware that you might need more than + * one level of escaping depending on your templating engine and the way you interpolated + * the value.) Do make use of your platform's escaping mechanism as it might be good + * enough before coding your own. e.g. Ruby has + * [Regexp.escape(str)](http://www.ruby-doc.org/core-2.0.0/Regexp.html#method-c-escape) + * and Python has [re.escape](http://docs.python.org/library/re.html#re.escape). + * Javascript lacks a similar built in function for escaping. Take a look at Google + * Closure library's [goog.string.regExpEscape(s)]( + * http://docs.closure-library.googlecode.com/git/closure_goog_string_string.js.source.html#line962). * + * Refer {@link ng.$sceDelegateProvider#example $sceDelegateProvider} for an example. * + * ## Show me an example using SCE. * * @example @@ -925,7 +1039,7 @@ function $SceProvider() { getTrusted = sce.getTrusted, trustAs = sce.trustAs; - angular.forEach(SCE_CONTEXTS, function (enumValue, name) { + forEach(SCE_CONTEXTS, function (enumValue, name) { var lName = lowercase(name); sce[camelCase("parse_as_" + lName)] = function (expr) { return parse(enumValue, expr); diff --git a/test/ng/sceSpecs.js b/test/ng/sceSpecs.js index 1eb382f6a887..7a309f9513d2 100644 --- a/test/ng/sceSpecs.js +++ b/test/ng/sceSpecs.js @@ -288,32 +288,151 @@ describe('SCE', function() { '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: foo'); })); - it('should support custom regex', runTest( - { - whiteList: [/^http:\/\/example\.com.*/], - blackList: [] - }, function($sce) { - expect($sce.getTrustedResourceUrl('http://example.com/foo')).toEqual('http://example.com/foo'); - expect(function() { $sce.getTrustedResourceUrl('https://example.com/foo'); }).toThrowMinErr( - '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: https://example.com/foo'); - })); + if (!msie || msie > 8) { + it('should not accept unknown matcher type', function() { + expect(function() { + runTest({whiteList: [{}]}, null)(); + }).toThrowMinErr('$injector', 'modulerr', new RegExp( + /Failed to instantiate module function ?\(\$sceDelegateProvider\) due to:\n/.source + + /[^[]*\[\$sce:imatcher\] Matchers may only be "self", string patterns or RegExp objects/.source)); + }); + } - it('should support the special string "self" in whitelist', runTest( - { - whiteList: ['self'], - blackList: [] - }, function($sce) { - expect($sce.getTrustedResourceUrl('foo')).toEqual('foo'); - })); + describe('adjustMatcher', function() { + it('should rewrite regex into regex and add ^ & $ on either end', function() { + expect(adjustMatcher(/a.*b/).exec('a.b')).not.toBeNull(); + expect(adjustMatcher(/a.*b/).exec('-a.b-')).toBeNull(); + // Adding ^ & $ onto a regex that already had them should also work. + expect(adjustMatcher(/^a.*b$/).exec('a.b')).not.toBeNull(); + expect(adjustMatcher(/^a.*b$/).exec('-a.b-')).toBeNull(); + }); + }); - it('should support the special string "self" in blacklist', runTest( - { - whiteList: [/.*/], - blackList: ['self'] - }, function($sce) { - expect(function() { $sce.getTrustedResourceUrl('foo'); }).toThrowMinErr( - '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: foo'); - })); + describe('regex matcher', function() { + it('should support custom regex', runTest( + { + whiteList: [/^http:\/\/example\.com\/.*/], + blackList: [] + }, function($sce) { + expect($sce.getTrustedResourceUrl('http://example.com/foo')).toEqual('http://example.com/foo'); + // must match entire regex + expect(function() { $sce.getTrustedResourceUrl('https://example.com/foo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: https://example.com/foo'); + // https doesn't match (mismatched protocol.) + expect(function() { $sce.getTrustedResourceUrl('https://example.com/foo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: https://example.com/foo'); + })); + + it('should match entire regex', runTest( + { + whiteList: [/https?:\/\/example\.com\/foo/], + blackList: [] + }, function($sce) { + expect($sce.getTrustedResourceUrl('http://example.com/foo')).toEqual('http://example.com/foo'); + expect($sce.getTrustedResourceUrl('https://example.com/foo')).toEqual('https://example.com/foo'); + expect(function() { $sce.getTrustedResourceUrl('http://example.com/fo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example.com/fo'); + // Suffix not allowed even though original regex does not contain an ending $. + expect(function() { $sce.getTrustedResourceUrl('http://example.com/foo2'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example.com/foo2'); + // Prefix not allowed even though original regex does not contain a leading ^. + expect(function() { $sce.getTrustedResourceUrl('xhttp://example.com/foo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: xhttp://example.com/foo'); + })); + }); + + describe('string matchers', function() { + it('should support strings as matchers', runTest( + { + whiteList: ['http://example.com/foo'], + blackList: [] + }, function($sce) { + expect($sce.getTrustedResourceUrl('http://example.com/foo')).toEqual('http://example.com/foo'); + // "." is not a special character like in a regex. + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo'); + // You can match a prefix. + expect(function() { $sce.getTrustedResourceUrl('http://example.com/foo2'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example.com/foo2'); + // You can match a suffix. + expect(function() { $sce.getTrustedResourceUrl('xhttp://example.com/foo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: xhttp://example.com/foo'); + })); + + it('should support the * wildcard', runTest( + { + whiteList: ['http://example.com/foo*'], + blackList: [] + }, function($sce) { + expect($sce.getTrustedResourceUrl('http://example.com/foo')).toEqual('http://example.com/foo'); + // The * wildcard should match extra characters. + expect($sce.getTrustedResourceUrl('http://example.com/foo-bar')).toEqual('http://example.com/foo-bar'); + // The * wildcard does not match ':' + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo:bar'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo:bar'); + // The * wildcard does not match '/' + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo/bar'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo/bar'); + // The * wildcard does not match '.' + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo.bar'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo.bar'); + // The * wildcard does not match '?' + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo?bar'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo?bar'); + // The * wildcard does not match '&' + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo&bar'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo&bar'); + // The * wildcard does not match ';' + expect(function() { $sce.getTrustedResourceUrl('http://example-com/foo;bar'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: http://example-com/foo;bar'); + })); + + it('should support the ** wildcard', runTest( + { + whiteList: ['http://example.com/foo**'], + blackList: [] + }, function($sce) { + expect($sce.getTrustedResourceUrl('http://example.com/foo')).toEqual('http://example.com/foo'); + // The ** wildcard should match extra characters. + expect($sce.getTrustedResourceUrl('http://example.com/foo-bar')).toEqual('http://example.com/foo-bar'); + // The ** wildcard accepts the ':/.?&' characters. + expect($sce.getTrustedResourceUrl('http://example.com/foo:1/2.3?4&5-6')).toEqual('http://example.com/foo:1/2.3?4&5-6'); + })); + + // TODO(chirayu): This throws a very helpful TypeError exception - "Object doesn't support + // this property or method". Tracing using the debugger on IE8 developer tools shows me one + // catch(e) block where the exception is correct, but jumping up to the parent catch block has + // the TypeError exception. I've been unable to repro this outside this snippet or figure out + // why this is happening. Until then, this test doesn't run on IE8. + if (!msie || msie > 8) { + it('should not accept *** in the string', function() { + expect(function() { + runTest({whiteList: ['http://***']}, null)(); + }).toThrowMinErr('$injector', 'modulerr', new RegExp( + /Failed to instantiate module function ?\(\$sceDelegateProvider\) due to:\n/.source + + /[^[]*\[\$sce:iwcard\] Illegal sequence \*\*\* in string matcher\. String: http:\/\/\*\*\*/.source)); + }); + } + }); + + describe('"self" matcher', function() { + it('should support the special string "self" in whitelist', runTest( + { + whiteList: ['self'], + blackList: [] + }, function($sce) { + expect($sce.getTrustedResourceUrl('foo')).toEqual('foo'); + })); + + it('should support the special string "self" in blacklist', runTest( + { + whiteList: [/.*/], + blackList: ['self'] + }, function($sce) { + expect(function() { $sce.getTrustedResourceUrl('foo'); }).toThrowMinErr( + '$sce', 'insecurl', 'Blocked loading resource from url not allowed by $sceDelegate policy. URL: foo'); + })); + }); it('should have blacklist override the whitelist', runTest( { @@ -327,7 +446,7 @@ describe('SCE', function() { it('should support multiple items in both lists', runTest( { whiteList: [/^http:\/\/example.com\/1$/, /^http:\/\/example.com\/2$/, /^http:\/\/example.com\/3$/, 'self'], - blackList: [/^http:\/\/example.com\/3$/, /open_redirect/], + blackList: [/^http:\/\/example.com\/3$/, /.*\/open_redirect/], }, function($sce) { expect($sce.getTrustedResourceUrl('same_domain')).toEqual('same_domain'); expect($sce.getTrustedResourceUrl('http://example.com/1')).toEqual('http://example.com/1');