#!/usr/bin/env python # -*- coding: utf-8 -*- # # Quick Hack to convert a Kanji list (from STDIN) to Heisig # "Remembering the Kanji" numbers # # Modules import urllib2 import sys import re import time import subprocess out = open("kanji_rtk_fetch.py-output", "w") for kanji in sys.stdin.readlines(): kanji = kanji.strip() # No newlines print "Fetching " + kanji + " ..." url = "http://jisho.org/kanji/details/" + kanji + "/" print " " + url time.sleep(3) # Does not get all Kanji ... wget does the job better # f = urllib2.urlopen(url) # data = f.read() child = subprocess.Popen(["wget", "-O", "-", url], stdout=subprocess.PIPE) data = "" for line in child.stdout: data = data + line child.wait() # Veeery dump parsing, maybe it helps :-) # Look for #