A pretty ok adblocker.
Of course, you could always just use this: http://someonewhocares.org/hosts/
adblock.js is better and you should use that instead.
ignore 'ReferenceError: Can't find variable: uzblAdBlock' errors, it happens due to LOAD_PROGRESS events happening before the LOAD_COMMIT that starts the script
#! /usr/bin/env python import os from sys import argv from urlparse import urlparse def xdghome(key, default): '''Attempts to use the environ XDG_*_HOME paths if they exist otherwise use $HOME and the default path.''' xdgkey = "XDG_%s_HOME" % key if xdgkey in os.environ.keys() and os.environ[xdgkey]: return os.environ[xdgkey] return os.path.join(os.environ['HOME'], default) # Setup xdg paths. DATA_DIR = os.path.join(xdghome('DATA', '.local/share/'), 'uzbl/') # Blockfile location. BLOCKFILE = os.path.join(DATA_DIR, 'adblock') JAVASCRIPT = ' '.join(filter(None, map(str.strip, ''' var uzblAdBlock = function() { var toblock = %s; for(var n = 0; n < toblock.length; n++) { var items; while (1) { try { items = document.evaluate(toblock[n], document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); if (items == null) { break; } var i = items.iterateNext(); if (i == null) { break; } i.parentNode.removeChild(i); } catch (e) { break; } } } }; '''.split('\n')))) def get_domain(url): '''Return domain segment of url.''' if not url.startswith('http'): url = "http://%s" % url loc = urlparse(url).netloc if loc.startswith('www.'): loc = loc[4:] return loc def adblock(url, fifo): fh = open(BLOCKFILE, 'r') lines = [line.strip() for line in fh.readlines()] fh.close() rules, capture = [], False for l in lines: if not l: # newline splits section capture = False elif l[0] == '#': continue elif capture: rules.append(l) elif l[-1] == ':': if get_domain(l[:-1]) == url or l[:-1] == "global": capture = True rulestr = repr(rules).replace("@", "\@") js = "js %s\n" % (JAVASCRIPT % rulestr) fh = open(fifo, "w") fh.write(js) fh.close() if __name__ == '__main__': adblock(get_domain(os.environ['UZBL_URI']), os.environ['UZBL_FIFO'])
@on_event LOAD_COMMIT spawn @scripts_dir/adblock.py @on_event LOAD_PROGRESS js uzblAdBlock();
$XDG_DATA_HOME/uzbl/adblock
url: xpath to remove xpath to remove url: xpath to remove xpath to remove
etc…
global: //a[contains(@href,'ad.doubleclick')] //iframe[contains(@src,'fastclick.net')] sales.starcitygames.com: //object[contains(@codebase,'swf')] last.fm: //iframe[contains(@class,'ad')] //div[contains(@id, 'ads')] //div[@class='LastAd'] amazon.com: //img[contains(@src,'transp')] //iframe
global covers all sites.
Note: do not use double quotes in the adblock file.
A simple script that creates a config file from the adblock easylist. Nowhere near complete.
#!/bin/bash
#sort properly
LC_ALL=C
wget -q -O - http://adblockplus.mozdev.org/easylist/easylist.txt | sort |
sed -n '
1iglobal:
1i//iframe[@name="google_ads_frame"]
# delete lines
/^\[/d # first line
/^!/d # comments
/+>/d # weird lines
/[]]/d # cannot deal with selectors
h
# these characters are not nice
/[^:]/{
/##/{
# global
/^##/{
g
s!^###\(.*\)!\t//*[@id="\1"]!p
g
s!^##\.\(.*\)!\t//*[@class="\1"]!p
d
}
# not global
/^[^#]*##/{
g
s!^\([^#]*\)###\(.*\)!\n\1:\n\t//*[@id="\2"]!p
g
s!^\([^#]*\)##\.\(.*\)!\n\1:\n\t//*[@class="\2"]!p
d
}
}
}
' | sed "s/\"/'/g" > $XDG_DATA_HOME/uzbl/adblock