#!/usr/bin/python import time import feedparser import sys import re def utf8_lead_byte(b): '''A UTF-8 intermediate byte starts with the bits 10xxxxxx.''' return (b & 0xC0) != 0x80 def utf8_byte_truncate(text, max_bytes): '''If text[max_bytes] is not a lead byte, back up until a lead byte is found and truncate before that character.''' utf8 = text.encode('utf8') if len(utf8) <= max_bytes: return utf8 i = max_bytes while i > 0 and not utf8_lead_byte(utf8[i]): i -= 1 return utf8[:i] url = sys.argv[1] feed = feedparser.parse(url)['entries'] while not feed: time.sleep(900) feed = feedparser.parse(url)['entries'] old = [] for item in feed[::-1]: old.append(item['link']) max1 = 356 # :Twitter!<10>@<63> PRIVMSG #piraatit :() https://twitter.com/<author>/status/<19> while True: time.sleep(900) feed = feedparser.parse(url)['entries'] for item in feed[4::-1]: if {'link', 'author', 'title'}.issubset(item) and item['link'] not in old: title = re.sub('(https?://)?pic\.twitter.com/\S+', '', re.sub('\n+\s*', ' ', item['title'])) author = item['author'].replace('@', '') maxlength = max1 - 2*(len(author)-2) if len(title.encode('utf-8')) > maxlength: title = ' '.join(utf8_byte_truncate(title, maxlength-1).decode('utf-8').split(' ')[0:-1])+'…' print( author, title, item['link'], flush=True ) old.append(item['link']) old = old[-100:]