====adblocker====
A pretty ok adblocker.
Of course, you could always just use this: http://someonewhocares.org/hosts/
[[adblock.js]] is better and you should use that instead.
ignore 'ReferenceError: Can't find variable: uzblAdBlock' errors, it happens due to LOAD_PROGRESS events happening before the LOAD_COMMIT that starts the script
===Script===
#! /usr/bin/env python
import os
from sys import argv
from urlparse import urlparse
def xdghome(key, default):
'''Attempts to use the environ XDG_*_HOME paths if they exist otherwise
use $HOME and the default path.'''
xdgkey = "XDG_%s_HOME" % key
if xdgkey in os.environ.keys() and os.environ[xdgkey]:
return os.environ[xdgkey]
return os.path.join(os.environ['HOME'], default)
# Setup xdg paths.
DATA_DIR = os.path.join(xdghome('DATA', '.local/share/'), 'uzbl/')
# Blockfile location.
BLOCKFILE = os.path.join(DATA_DIR, 'adblock')
JAVASCRIPT = ' '.join(filter(None, map(str.strip, '''
var uzblAdBlock = function() {
var toblock = %s;
for(var n = 0; n < toblock.length; n++) {
var items;
while (1) {
try {
items = document.evaluate(toblock[n], document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
if (items == null) { break; }
var i = items.iterateNext();
if (i == null) { break; }
i.parentNode.removeChild(i);
} catch (e) {
break;
}
}
}
};
'''.split('\n'))))
def get_domain(url):
'''Return domain segment of url.'''
if not url.startswith('http'):
url = "http://%s" % url
loc = urlparse(url).netloc
if loc.startswith('www.'):
loc = loc[4:]
return loc
def adblock(url, fifo):
fh = open(BLOCKFILE, 'r')
lines = [line.strip() for line in fh.readlines()]
fh.close()
rules, capture = [], False
for l in lines:
if not l: # newline splits section
capture = False
elif l[0] == '#':
continue
elif capture:
rules.append(l)
elif l[-1] == ':':
if get_domain(l[:-1]) == url or l[:-1] == "global":
capture = True
rulestr = repr(rules).replace("@", "\@")
js = "js %s\n" % (JAVASCRIPT % rulestr)
fh = open(fifo, "w")
fh.write(js)
fh.close()
if __name__ == '__main__':
adblock(get_domain(os.environ['UZBL_URI']), os.environ['UZBL_FIFO'])
===Binding===
@on_event LOAD_COMMIT spawn @scripts_dir/adblock.py
@on_event LOAD_PROGRESS js uzblAdBlock();
===Config File===
$XDG_DATA_HOME/uzbl/adblock
[[http://www.w3schools.com/XPath/xpath_syntax.asp|xpath syntax]]
url:
xpath to remove
xpath to remove
url:
xpath to remove
xpath to remove
etc...
global:
//a[contains(@href,'ad.doubleclick')]
//iframe[contains(@src,'fastclick.net')]
sales.starcitygames.com:
//object[contains(@codebase,'swf')]
last.fm:
//iframe[contains(@class,'ad')]
//div[contains(@id, 'ads')]
//div[@class='LastAd']
amazon.com:
//img[contains(@src,'transp')]
//iframe
global covers all sites.
Note: do not use double quotes in the adblock file.
=== config generator ===
A simple script that creates a config file from the adblock easylist. Nowhere near complete.
#!/bin/bash
#sort properly
LC_ALL=C
wget -q -O - http://adblockplus.mozdev.org/easylist/easylist.txt | sort |
sed -n '
1iglobal:
1i//iframe[@name="google_ads_frame"]
# delete lines
/^\[/d # first line
/^!/d # comments
/+>/d # weird lines
/[]]/d # cannot deal with selectors
h
# these characters are not nice
/[^:]/{
/##/{
# global
/^##/{
g
s!^###\(.*\)!\t//*[@id="\1"]!p
g
s!^##\.\(.*\)!\t//*[@class="\1"]!p
d
}
# not global
/^[^#]*##/{
g
s!^\([^#]*\)###\(.*\)!\n\1:\n\t//*[@id="\2"]!p
g
s!^\([^#]*\)##\.\(.*\)!\n\1:\n\t//*[@class="\2"]!p
d
}
}
}
' | sed "s/\"/'/g" > $XDG_DATA_HOME/uzbl/adblock