User Tools

Site Tools


adblock

adblocker

A pretty ok adblocker.

Of course, you could always just use this: http://someonewhocares.org/hosts/

adblock.js is better and you should use that instead.

ignore 'ReferenceError: Can't find variable: uzblAdBlock' errors, it happens due to LOAD_PROGRESS events happening before the LOAD_COMMIT that starts the script

Script

#! /usr/bin/env python
 
import os
from sys import argv
from urlparse import urlparse
 
def xdghome(key, default):
    '''Attempts to use the environ XDG_*_HOME paths if they exist otherwise
    use $HOME and the default path.'''
 
    xdgkey = "XDG_%s_HOME" % key
    if xdgkey in os.environ.keys() and os.environ[xdgkey]:
        return os.environ[xdgkey]
 
    return os.path.join(os.environ['HOME'], default)
 
# Setup xdg paths.
DATA_DIR = os.path.join(xdghome('DATA', '.local/share/'), 'uzbl/')
 
# Blockfile location.
BLOCKFILE = os.path.join(DATA_DIR, 'adblock')
 
JAVASCRIPT = ' '.join(filter(None, map(str.strip, '''
var uzblAdBlock = function() {
    var toblock = %s;
    for(var n = 0; n < toblock.length; n++) {
        var items;
        while (1) {
            try {
                items = document.evaluate(toblock[n], document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
                if (items == null) { break; }
                var i = items.iterateNext();
                if (i == null) { break; }
                i.parentNode.removeChild(i);
            } catch (e) {
                break;
            }
        }
    }
};
'''.split('\n'))))
 
 
def get_domain(url):
    '''Return domain segment of url.'''
 
    if not url.startswith('http'):
        url = "http://%s" % url
 
    loc = urlparse(url).netloc
    if loc.startswith('www.'):
        loc = loc[4:]
 
    return loc
 
 
def adblock(url, fifo):
    fh = open(BLOCKFILE, 'r')
    lines = [line.strip() for line in fh.readlines()]
    fh.close()
 
    rules, capture = [], False
    for l in lines:
        if not l: # newline splits section
            capture = False
 
        elif l[0] == '#':
            continue
 
        elif capture:
            rules.append(l)
 
        elif l[-1] == ':':
            if get_domain(l[:-1]) == url or l[:-1] == "global":
                capture = True
 
    rulestr = repr(rules).replace("@", "\@")
    js = "js %s\n" % (JAVASCRIPT % rulestr)
    fh = open(fifo, "w")
    fh.write(js)
    fh.close()
 
if __name__ == '__main__':
    adblock(get_domain(os.environ['UZBL_URI']), os.environ['UZBL_FIFO'])

Binding

@on_event   LOAD_COMMIT    spawn @scripts_dir/adblock.py
@on_event   LOAD_PROGRESS  js uzblAdBlock();

Config File

$XDG_DATA_HOME/uzbl/adblock

xpath syntax

url:
  xpath to remove
  xpath to remove

url:
  xpath to remove
  xpath to remove

etc…

global:
  //a[contains(@href,'ad.doubleclick')]
  //iframe[contains(@src,'fastclick.net')]

sales.starcitygames.com:
  //object[contains(@codebase,'swf')]

last.fm:
  //iframe[contains(@class,'ad')]
  //div[contains(@id, 'ads')]
  //div[@class='LastAd']

amazon.com:
  //img[contains(@src,'transp')]
  //iframe

global covers all sites.

Note: do not use double quotes in the adblock file.

config generator

A simple script that creates a config file from the adblock easylist. Nowhere near complete.

#!/bin/bash
                                                                                                                                           
#sort properly                                                                                                                             
LC_ALL=C 
                                                                                                                                           
wget -q -O - http://adblockplus.mozdev.org/easylist/easylist.txt | sort |                                                                  
sed -n '                                                                                                                                   
        1iglobal:
        1i//iframe[@name="google_ads_frame"]                                                                                               

        # delete lines
        /^\[/d # first line
        /^!/d # comments
        /+>/d # weird lines                                                                                                                
        /[]]/d # cannot deal with selectors                                                                                                

        h

        # these characters are not nice
        /[^:]/{
                /##/{
                        # global
                        /^##/{                                                                                                             
                                g                                                                                                          
                                s!^###\(.*\)!\t//*[@id="\1"]!p                                                                             
                                g                                                                                                          
                                s!^##\.\(.*\)!\t//*[@class="\1"]!p                                                                         
                                d                                                                                                          
                        }                                                                                                                  

                        # not global                                                                                                       
                        /^[^#]*##/{
                                g
                                s!^\([^#]*\)###\(.*\)!\n\1:\n\t//*[@id="\2"]!p                                                             
                                g                                                                                                          
                                s!^\([^#]*\)##\.\(.*\)!\n\1:\n\t//*[@class="\2"]!p                                                         
                                d                                                                                                          
                        }                                                                                                                  
                }
        }
' | sed "s/\"/'/g" > $XDG_DATA_HOME/uzbl/adblock
adblock.txt · Last modified: 2016/08/31 14:10 (external edit)