/* * Cachedot watches Slashdot and mirrors the sites that new stories link to into * Freenet. * * TODO: * Make fcpputsite work * Convert external links to used __CHECKED_HTTP__ * Make the index page nicer (like have a copy of the story with the links replaced) * Be polite and check for robot.txt directives * (Longer term): Try to remove the reliance on external Unix programs like wget and * fcpputsite */ import java.io.*; import java.net.*; import java.util.*; import java.util.regex.*; /** * Mirror the hyperlinks on a Slashdot story to Freenet * *@author Sanity *@created July 20, 2002 */ public class Cachedot { /** * The main program for the Cachedot class * *@param args The command line arguments *@exception MalformedURLException Description of the Exception *@exception IOException Description of the Exception *@exception InterruptedException Description of the Exception */ static URL slashXml; static URL slashFP; static File mirrorDir = new File("/tmp/cachedot"); /** * The main program for the Cachedot class * *@param args The command line arguments *@exception MalformedURLException Description of the Exception *@exception IOException Description of the Exception *@exception InterruptedException Description of the Exception */ public static void main(String[] args) throws MalformedURLException, IOException, InterruptedException { slashXml = new URL("http://slashdot.org/slashdot.xml"); slashFP = new URL("http://slashdot.org/"); String lastXML = ""; while (true) { // Grab slashdot XML page log("Checking " + slashXml + " for changes"); InputStream i = slashXml.openStream(); StringBuffer sb = new StringBuffer(100); while (true) { int r = i.read(); if (r == -1) break; sb.append((char) r); } if (lastXML.hashCode() != sb.toString().hashCode()) { lastXML = sb.toString(); Story s = getLastStory(); grabStory(s); Util.fcpInsert(mirrorDir, s.getId()); log("Insert complete - mirror available at freenet:SSK@Jfwpce58XD6gk~uOz4zy2rzV65gPAgM/" + s.getId() + "//"); } Thread.sleep(10000); } } /** * Gets the newUrls attribute of the Cachedot object * *@return The lastStory value *@exception IOException Description of the Exception */ public static Story getLastStory() throws IOException { log("Grabbing last story from " + slashFP); InputStream i = slashFP.openStream(); StringBuffer sb = new StringBuffer(1000); while (true) { int r = i.read(); if (r == -1) break; sb.append((char) r); } Pattern ex = Pattern.compile("FACE=\"arial,helvetica\" SIZE=\"4\" COLOR=\"#FFFFFF\">(.*?).*?dept(.*?)Read More.*?articles/(.*?).shtml", Pattern.DOTALL); Matcher m = ex.matcher(sb.toString()); m.find(); return new Story(m.group(1), m.group(2), m.group(3)); } /** * Description of the Method * *@param s Description of the Parameter */ public static void grabStory(Story s) { StringBuffer index = new StringBuffer(); if (mirrorDir.exists()) Util.recursDel(mirrorDir); mirrorDir.mkdir(); index.append("Cachedot Mirror of \"" + s.getTitle() + "\"\n"); index.append("

Cachedot Mirror of \"" + s.getTitle() + "\"

\n"); index.append(""); try { PrintWriter pw = new PrintWriter(new FileOutputStream(new File(mirrorDir, "index.html"))); pw.print(index.toString()); pw.close(); } catch (Exception e) { Cachedot.log("Error writing index file: " + e); } } /** * Description of the Method * *@param message Description of the Parameter */ public static void log(String message) { System.err.println(message); } } /** * A Slashdot story * *@author Sanity *@created July 20, 2002 */ class Story { String title, text, id; Hashtable urls = new Hashtable(); /** * Constructor for the Story object * *@param title The title of the story *@param text The text of the story *@param id The Slashdot story ID */ public Story(String title, String text, String id) { // Remove HTML from title StringBuffer titleSb = new StringBuffer(title.length()); boolean k = true; for (int x = 0; x < title.length(); x++) { char c = title.charAt(x); if (c == '<') k = false; else if (c == '>') k = true; else if (k) titleSb.append(c); } this.title = title.toString(); // Remove '/'s from ID StringBuffer idSb = new StringBuffer(id.length()); for (int x = 0; x < id.length(); x++) { char c = id.charAt(x); if (c != '/') idSb.append(c); } this.id = idSb.toString(); Cachedot.log("Parsing story: '" + this.title + "' with id " + this.id); Pattern urlex = Pattern.compile("(.*?)<", Pattern.CASE_INSENSITIVE); Matcher m = urlex.matcher(text); while (m.find()) { Cachedot.log("Found URL: " + m.group(1)); urls.put(m.group(1), m.group(2)); } Cachedot.log("Done parsing story"); } /** * Gets the id attribute of the Story object * *@return The id value */ public String getId() { return this.id; } /** * Gets the title attribute of the Story object * *@return The title value */ public String getTitle() { return this.title; } /** * Gets an Enumeration of the URLs in this story * *@return The urls value */ public Enumeration getUrls() { return urls.keys(); } /** * Gets the text associated with a URL * *@param url Description of the Parameter *@return The text value */ public String getText(String url) { return (String) urls.get(url); } } /** * Description of the Class * *@author Sanity *@created July 21, 2002 */ class Util { static Runtime rt; static { rt = Runtime.getRuntime(); } /** * Description of the Method * *@param url Description of the Parameter *@param dest Description of the Parameter *@param depth Description of the Parameter *@return Description of the Return Value */ public static boolean wget(String url, File dest, int depth) { Cachedot.log("Mirroring " + url + " to " + dest); try { Process p = rt.exec("wget --timeout=10 --tries=2 --recursive --level=" + depth + " --convert-links " + url, new String[0], dest); return (p.waitFor() == 0); } catch (Exception e) { Cachedot.log("Wget of " + url + " failed due to " + e); return false; } } /** * Description of the Method * *@param dir Description of the Parameter *@param name Description of the Parameter *@return Description of the Return Value */ public static boolean fcpInsert(File dir, String name) { Thread isp; try { Cachedot.log("Starting fcpInsert of files in " + dir); Process p = rt.exec("fcpputsite -d -l 3 '" + name + "' " + dir + " Jfwpce58XD6gk~uOz4zy2rzV65g PZeKc90WU-8vdQ~Oc451Fw2tpEM"); isp = (new InputStreamPrinter(p.getInputStream())); isp.start(); return (p.waitFor() == 0); } catch (Exception e) { Cachedot.log("Insert of " + name + " failed due to" + e); return false; } } /** * Description of the Method * *@param del Description of the Parameter *@return Description of the Return Value */ public static boolean recursDel(File del) { try { Process p = rt.exec("rm -rf " + del); return (p.waitFor() == 0); } catch (Exception e) { Cachedot.log("Delete of " + del + " failed due to " + e); return false; } } } /** * Description of the Class * *@author ian *@created July 25, 2002 */ class InputStreamPrinter extends Thread { InputStream os; /** * Constructor for the OutputStreamPrinter object * *@param os Description of the Parameter */ public InputStreamPrinter(InputStream os) { this.os = os; } /** Main processing method for the OutputStreamPrinter object */ public void run() { System.out.println("OutputStreamPrinter started"); try { int c = os.read(); while (c != -1) { System.out.write((char) c); System.out.flush(); c = os.read(); } } catch (Exception e) {} System.out.println("OutputStreamPrinter stopped"); } }