Skip to content

Commit

Permalink
Change compiled list format to a saner block id management
Browse files Browse the repository at this point in the history
Just use self-described readable section identifiers instead
of difficult-to-manage arbitrary integers.
  • Loading branch information
gorhill committed Dec 7, 2021
1 parent 8309cc5 commit 72bb894
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 66 deletions.
11 changes: 2 additions & 9 deletions src/js/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ const µBlock = { // jshint ignore:line

// Read-only
systemSettings: {
compiledMagic: 41, // Increase when compiled format changes
selfieMagic: 41, // Increase when selfie format changes
compiledMagic: 42, // Increase when compiled format changes
selfieMagic: 42, // Increase when selfie format changes
},

// https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501
Expand All @@ -189,13 +189,6 @@ const µBlock = { // jshint ignore:line
compiledFormatChanged: false,
selfieIsInvalid: false,

compiledCosmeticSection: 200,
compiledScriptletSection: 300,
compiledHTMLSection: 400,
compiledHTTPHeaderSection: 500,
compiledSentinelSection: 1000,
compiledBadSubsection: 1,

restoreBackupSettings: {
lastRestoreFile: '',
lastRestoreTime: 0,
Expand Down
21 changes: 9 additions & 12 deletions src/js/cosmetic-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ const cosmeticSurveyingMissCountMax =
parseInt(vAPI.localStorage.getItem('cosmeticSurveyingMissCountMax'), 10) ||
15;

const COMPILED_SPECIFIC_SECTION = 0;
const COMPILED_GENERIC_SECTION = 1;

/******************************************************************************/
/******************************************************************************/

Expand Down Expand Up @@ -398,7 +395,7 @@ FilterContainer.prototype.compileGenericHideSelector = function(
return;
}

writer.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION);
writer.select('COSMETIC_FILTERS:GENERIC');

const type = compiled.charCodeAt(0);
let key;
Expand Down Expand Up @@ -501,7 +498,7 @@ FilterContainer.prototype.compileGenericUnhideSelector = function(
return;
}

writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION);
writer.select('COSMETIC_FILTERS:SPECIFIC');

// https://github.com/chrisaljoudi/uBlock/issues/497
// All generic exception filters are stored as hostname-based filter
Expand Down Expand Up @@ -531,7 +528,7 @@ FilterContainer.prototype.compileSpecificSelector = function(
return;
}

writer.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION);
writer.select('COSMETIC_FILTERS:SPECIFIC');

// https://github.com/chrisaljoudi/uBlock/issues/145
let unhide = exception ? 1 : 0;
Expand Down Expand Up @@ -564,13 +561,13 @@ FilterContainer.prototype.compileTemporary = function(parser) {

FilterContainer.prototype.fromCompiledContent = function(reader, options) {
if ( options.skipCosmetic ) {
this.skipCompiledContent(reader, COMPILED_SPECIFIC_SECTION);
this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION);
this.skipCompiledContent(reader, 'SPECIFIC');
this.skipCompiledContent(reader, 'GENERIC');
return;
}

// Specific cosmetic filter section
reader.select(µb.compiledCosmeticSection + COMPILED_SPECIFIC_SECTION);
reader.select('COSMETIC_FILTERS:SPECIFIC');
while ( reader.next() ) {
this.acceptedCount += 1;
const fingerprint = reader.fingerprint();
Expand Down Expand Up @@ -606,12 +603,12 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
}

if ( options.skipGenericCosmetic ) {
this.skipCompiledContent(reader, COMPILED_GENERIC_SECTION);
this.skipCompiledContent(reader, 'GENERIC');
return;
}

// Generic cosmetic filter section
reader.select(µb.compiledCosmeticSection + COMPILED_GENERIC_SECTION);
reader.select('COSMETIC_FILTERS:GENERIC');
while ( reader.next() ) {
this.acceptedCount += 1;
const fingerprint = reader.fingerprint();
Expand Down Expand Up @@ -675,7 +672,7 @@ FilterContainer.prototype.fromCompiledContent = function(reader, options) {
/******************************************************************************/

FilterContainer.prototype.skipCompiledContent = function(reader, sectionId) {
reader.select(µb.compiledCosmeticSection + sectionId);
reader.select(`COSMETIC_FILTERS:${sectionId}`);
while ( reader.next() ) {
this.acceptedCount += 1;
this.discardedCount += 1;
Expand Down
4 changes: 2 additions & 2 deletions src/js/html-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ htmlFilteringEngine.compile = function(parser, writer) {
return;
}

writer.select(µb.compiledHTMLSection);
writer.select('HTML_FILTERS');

// TODO: Mind negated hostnames, they are currently discarded.

Expand Down Expand Up @@ -345,7 +345,7 @@ htmlFilteringEngine.fromCompiledContent = function(reader) {
// Don't bother loading filters if stream filtering is not supported.
if ( µb.canFilterResponseData === false ) { return; }

reader.select(µb.compiledHTMLSection);
reader.select('HTML_FILTERS');

while ( reader.next() ) {
acceptedCount += 1;
Expand Down
4 changes: 2 additions & 2 deletions src/js/httpheader-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ httpheaderFilteringEngine.freeze = function() {
};

httpheaderFilteringEngine.compile = function(parser, writer) {
writer.select(µb.compiledHTTPHeaderSection);
writer.select('HTTPHEADER_FILTERS');

const { compiled, exception } = parser.result;
const headerName = compiled.slice(15, -1);
Expand Down Expand Up @@ -136,7 +136,7 @@ httpheaderFilteringEngine.compileTemporary = function(parser) {
// 15 -1

httpheaderFilteringEngine.fromCompiledContent = function(reader) {
reader.select(µb.compiledHTTPHeaderSection);
reader.select('HTTPHEADER_FILTERS');

while ( reader.next() ) {
acceptedCount += 1;
Expand Down
29 changes: 18 additions & 11 deletions src/js/reverselookup-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@

/******************************************************************************/

const reBlockStart = /^#block-start-(\d+)\n/gm;
const reBlockStart = /^#block-start-([\w:]+)\n/gm;
let listEntries = Object.create(null);

const extractBlocks = function(content, begId, endId) {
const extractBlocks = function(content, ...ids) {
reBlockStart.lastIndex = 0;
const out = [];
let match = reBlockStart.exec(content);
while ( match !== null ) {
const beg = match.index + match[0].length;
const blockId = parseInt(match[1], 10);
if ( blockId >= begId && blockId < endId ) {
const end = content.indexOf('#block-end-' + match[1], beg);
const id = match[1];
if ( ids.includes(id) ) {
const end = content.indexOf(`#block-end-${id}`, beg);
out.push(content.slice(beg, end));
reBlockStart.lastIndex = end;
}
Expand All @@ -58,7 +58,7 @@ const fromNetFilter = function(details) {
for ( const assetKey in listEntries ) {
const entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
const content = extractBlocks(entry.content, 100, 101);
const content = extractBlocks(entry.content, 'NETWORK_FILTERS:GOOD');
let pos = 0;
for (;;) {
pos = content.indexOf(compiledFilter, pos);
Expand Down Expand Up @@ -159,9 +159,15 @@ const fromCosmeticFilter = function(details) {
for ( const assetKey in listEntries ) {
const entry = listEntries[assetKey];
if ( entry === undefined ) { continue; }
let content = extractBlocks(entry.content, 200, 1000),
isProcedural,
found;
const content = extractBlocks(
entry.content,
'COSMETIC_FILTERS:GENERIC',
'COSMETIC_FILTERS:SPECIFIC',
'SCRIPTLET_FILTERS',
'HTML_FILTERS',
'HTTPHEADER_FILTERS'
);
let found;
let pos = 0;
while ( (pos = content.indexOf(needle, pos)) !== -1 ) {
let beg = content.lastIndexOf('\n', pos);
Expand Down Expand Up @@ -216,9 +222,9 @@ const fromCosmeticFilter = function(details) {
case 8:
// HTML filtering
// Response header filtering
case 64:
case 64: {
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
isProcedural = (fargs[2] & 0b010) !== 0;
const isProcedural = (fargs[2] & 0b010) !== 0;
if (
isProcedural === false && fargs[3] !== selector ||
isProcedural && JSON.parse(fargs[3]).raw !== selector
Expand All @@ -237,6 +243,7 @@ const fromCosmeticFilter = function(details) {
}
found = fargs[1] + prefix + selector;
break;
}
// Scriptlet injection
case 32:
if ( exception !== ((fargs[2] & 0b001) !== 0) ) { break; }
Expand Down
4 changes: 2 additions & 2 deletions src/js/scriptlet-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ scriptletFilteringEngine.freeze = function() {
};

scriptletFilteringEngine.compile = function(parser, writer) {
writer.select(µb.compiledScriptletSection);
writer.select('SCRIPTLET_FILTERS');

// Only exception filters are allowed to be global.
const { raw, exception } = parser.result;
Expand Down Expand Up @@ -295,7 +295,7 @@ scriptletFilteringEngine.compileTemporary = function(parser) {
// 4 -1

scriptletFilteringEngine.fromCompiledContent = function(reader) {
reader.select(µb.compiledScriptletSection);
reader.select('SCRIPTLET_FILTERS');

while ( reader.next() ) {
acceptedCount += 1;
Expand Down
18 changes: 5 additions & 13 deletions src/js/static-filtering-io.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,13 @@ class CompiledListReader {
this.line = '';
this.blocks = new Map();
this.properties = new Map();
const reBlockStart = new RegExp(`^${blockStartPrefix}(\\d+)\\n`, 'gm');
const reBlockStart = new RegExp(`^${blockStartPrefix}([\\w:]+)\\n`, 'gm');
let match = reBlockStart.exec(raw);
while ( match !== null ) {
let beg = match.index + match[0].length;
let end = raw.indexOf(blockEndPrefix + match[1], beg);
this.blocks.set(parseInt(match[1], 10), raw.slice(beg, end));
const sectionId = match[1];
const beg = match.index + match[0].length;
const end = raw.indexOf(blockEndPrefix + sectionId, beg);
this.blocks.set(sectionId, raw.slice(beg, end));
reBlockStart.lastIndex = end;
match = reBlockStart.exec(raw);
}
Expand Down Expand Up @@ -130,15 +131,6 @@ class CompiledListReader {
}
}

CompiledListWriter.prototype.NETWORK_SECTION =
CompiledListReader.prototype.NETWORK_SECTION = 100;

CompiledListWriter.blockStartPrefix =
CompiledListReader.blockStartPrefix = blockStartPrefix;

CompiledListWriter.blockEndPrefix =
CompiledListReader.blockEndPrefix = blockEndPrefix;

/******************************************************************************/

export {
Expand Down
30 changes: 15 additions & 15 deletions src/js/static-net-filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,8 @@ const typeNameToTypeValue = {
'inline-font': 17 << TypeBitsOffset,
'inline-script': 18 << TypeBitsOffset,
'cname': 19 << TypeBitsOffset,
// 'unused': 20 << TypeBitsOffset,
// 'unused': 21 << TypeBitsOffset,
'webrtc': 22 << TypeBitsOffset,
'unsupported': 23 << TypeBitsOffset,
'webrtc': 20 << TypeBitsOffset,
'unsupported': 21 << TypeBitsOffset,
};

const otherTypeBitValue = typeNameToTypeValue.other;
Expand Down Expand Up @@ -169,8 +167,6 @@ const typeValueToTypeName = [

const MAX_TOKEN_LENGTH = 7;

const COMPILED_BAD_SECTION = 1;

// Four upper bits of token hash are reserved for built-in predefined
// token hashes, which should never end up being used when tokenizing
// any arbitrary string.
Expand Down Expand Up @@ -1779,12 +1775,10 @@ registerFilterClass(FilterCompositeAll);
const FilterHostnameDict = class {
static getCount(idata) {
const itrie = filterData[idata+1];
if ( itrie === 0 ) {
return filterRefs[filterData[idata+3]].length;
if ( itrie !== 0 ) {
return Array.from(destHNTrieContainer.trieIterator(itrie)).length;
}
return Array.from(
destHNTrieContainer.trieIterator(filterData[idata+1])
).length;
return filterRefs[filterData[idata+3]].length;
}

static match(idata) {
Expand Down Expand Up @@ -2640,6 +2634,12 @@ class FilterCompiler {
return this;
}

start(/* writer */) {
}

finish(/* writer */) {
}

clone() {
return new FilterCompiler(this.parser, this);
}
Expand Down Expand Up @@ -3105,8 +3105,8 @@ class FilterCompiler {

writer.select(
this.badFilter
? writer.NETWORK_SECTION + COMPILED_BAD_SECTION
: writer.NETWORK_SECTION
? 'NETWORK_FILTERS:BAD'
: 'NETWORK_FILTERS:GOOD'
);

// Reminder:
Expand Down Expand Up @@ -3715,7 +3715,7 @@ FilterContainer.prototype.createCompiler = function(parser) {
/******************************************************************************/

FilterContainer.prototype.fromCompiled = function(reader) {
reader.select(reader.NETWORK_SECTION);
reader.select('NETWORK_FILTERS:GOOD');
while ( reader.next() ) {
this.acceptedCount += 1;
if ( this.goodFilters.has(reader.line) ) {
Expand All @@ -3725,7 +3725,7 @@ FilterContainer.prototype.fromCompiled = function(reader) {
}
}

reader.select(reader.NETWORK_SECTION + COMPILED_BAD_SECTION);
reader.select('NETWORK_FILTERS:BAD');
while ( reader.next() ) {
this.badFilters.add(reader.line);
}
Expand Down
4 changes: 4 additions & 0 deletions src/js/storage.js
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {

parser.setMaxTokenLength(staticNetFilteringEngine.MAX_TOKEN_LENGTH);

compiler.start(writer);

while ( lineIter.eot() === false ) {
let line = lineIter.next();

Expand Down Expand Up @@ -1013,6 +1015,8 @@ self.addEventListener('hiddenSettingsChanged', ( ) => {
}
}

compiler.finish(writer);

// https://github.com/uBlockOrigin/uBlock-issues/issues/1365
// Embed version into compiled list itself: it is encoded in as the
// first digits followed by a whitespace.
Expand Down

0 comments on commit 72bb894

Please sign in to comment.