#!/usr/bin/env python # -*- coding: utf-8 -*- # # Quick Hack to convert a Kanji list (from STDIN) to Heisig # "Remembering the Kanji" numbers # # Modules import urllib2 import sys import re import time import subprocess out = open("kanji_rtk_fetch.py-output", "w") for kanji in sys.stdin.readlines(): kanji = kanji.strip() # No newlines print "Fetching " + kanji + " ..." url = "http://jisho.org/kanji/details/" + kanji + "/" print " " + url time.sleep(3) # Does not get all Kanji ... wget does the job better # f = urllib2.urlopen(url) # data = f.read() child = subprocess.Popen(["wget", "-O", "-", url], stdout=subprocess.PIPE) data = "" for line in child.stdout: data = data + line child.wait() # Veeery dump parsing, maybe it helps :-) # Look for # 1568\ # Remembering The Kanji # print data kanjinum=-1 for rx in re.finditer('(\d+)',data): # print " FIND " + rx.group(1) if data[rx.end():rx.end()+69] == 'Remembering The Kanji': print "Match found" # We found our match kanjinum=rx.group(1) break # Write the result if kanjinum is -1: print " Error fetching " + kanji + "\n" out.write("Error fetchung " + kanji + "\n") else: print " " + kanjinum + " " + kanji + "\n" out.write(kanjinum + " " + kanji + "\n") print "Done" print "Output in kanji_rtk_fetch.py-output"