import re class LinuxFR(BasicNewsRecipe): title = 'LinuxFR' __author__ = 'Christophe Chailloleau' description = u'Articles et journaux du site d\'actualit\xe9 autour du logiciel libre LinuxFR.org' language = 'fr' encoding = 'utf8' cover_url = 'http://linuxfr.org/images/logos/linuxfr2_gnu_approved.png' oldest_article = 1 timefmt = ' [%a %d %b %Y]' max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True remove_javascript = True feeds = [('Articles', 'http://linuxfr.org/backend/news/rss20.rss'), ('Journaux', 'http://linuxfr.org/backend/journaux/rss20.rss')] category = u'Linux, GNU, logiciel, libre, actualit\xe9, news, free, software, DLFP' remove_tags = [dict(name='div', attrs={'class':['menubartop','menubar','leftcol','rightbox','footer','commentsreply','signature']}), dict(name='div', attrs={'style':'clear:both'}), dict(name='p', attrs={'class':'hautpage'}), dict(name='a', attrs={'href':re.compile('#reply')}), dict(name='p', text=re.compile(u'^\s*[\[\]]\s*$')), dict(name='a', text=['[^]','[+]','[-]']), dict(name='a', attrs={'rel':'tag'}), dict(name='i'), dict(name='h1', text=[' : ','Journal : ']), dict(name='span', attrs={'class':'content-score'}), ] extra_css = "h1 { font-size: inherit; font-weight: bold; }" def preprocess_html(self, soup): for tag in soup.findAll(name='a',attrs={'href':re.compile('^\.\.')}): if tag.has_key('title'): tag['href']=tag['title'] p=tag.parent if p.name=='li' and len(p.contents)==4: p.contents[3].extract() else: p=tag.parent index=p.contents.index(tag) if len(tag.contents)>0: text=tag.contents[0] tag.extract() if not text is None: if text=='Lire le journal' or text=='Lire la suite' or text=='Lire les commentaires': p.extract() else: p.insert(index,text) first=soup.find(name='div', attrs={'class':'comments'}) if not first is None: p=first.parent index=p.contents.index(first) p.insert(index,"

Commentaires

") return soup