2005-11-05 dust

* Fix sucker to delete its temporary files.
    * Improve sucker's sml output some.
    * Fix Exception in SMLParser for weird sml.
This commit is contained in:
dust
2005-11-05 11:01:57 +00:00
committed by zzz
parent 0ad18cd0ba
commit 9050d7c218
4 changed files with 118 additions and 35 deletions

View File

@@ -15,6 +15,8 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Properties; import java.util.Properties;
import org.apache.tools.ant.filters.TokenFilter.IgnoreBlank;
import com.sun.syndication.feed.synd.SyndCategory; import com.sun.syndication.feed.synd.SyndCategory;
import com.sun.syndication.feed.synd.SyndContent; import com.sun.syndication.feed.synd.SyndContent;
import com.sun.syndication.feed.synd.SyndEntry; import com.sun.syndication.feed.synd.SyndEntry;
@@ -60,6 +62,8 @@ public class Sucker {
private List fileNames; private List fileNames;
private List fileStreams; private List fileStreams;
private List fileTypes; private List fileTypes;
private List tempFiles; // deleted after finished push
private boolean stripNewlines;
public Sucker() { public Sucker() {
} }
@@ -131,6 +135,9 @@ public class Sucker {
*/ */
public void suck() { public void suck() {
SyndFeed feed; SyndFeed feed;
File fetched=null;
tempFiles = new ArrayList();
// Find base url // Find base url
int idx=urlToLoad.lastIndexOf('/'); int idx=urlToLoad.lastIndexOf('/');
@@ -139,7 +146,7 @@ public class Sucker {
else else
baseUrl=urlToLoad; baseUrl=urlToLoad;
debugLog("Processing: "+urlToLoad); infoLog("Processing: "+urlToLoad);
debugLog("Base url: "+baseUrl); debugLog("Base url: "+baseUrl);
// //
@@ -187,8 +194,7 @@ public class Sucker {
// fetch // fetch
int numRetries = 2; int numRetries = 2;
File fetched = File.createTempFile("sucker", ".fetch"); fetched = File.createTempFile("sucker", ".fetch");
fetched.deleteOnExit();
EepGet get = new EepGet(I2PAppContext.getGlobalContext(), shouldProxy, proxyHost, proxyPortNum, EepGet get = new EepGet(I2PAppContext.getGlobalContext(), shouldProxy, proxyHost, proxyPortNum,
numRetries, fetched.getAbsolutePath(), urlToLoad); numRetries, fetched.getAbsolutePath(), urlToLoad);
SuckerFetchListener lsnr = new SuckerFetchListener(); SuckerFetchListener lsnr = new SuckerFetchListener();
@@ -197,10 +203,12 @@ public class Sucker {
boolean ok = lsnr.waitForSuccess(); boolean ok = lsnr.waitForSuccess();
if (!ok) { if (!ok) {
System.err.println("Unable to retrieve the url after " + numRetries + " tries."); System.err.println("Unable to retrieve the url after " + numRetries + " tries.");
fetched.delete();
return; return;
} }
if(get.getNotModified()) { if(get.getNotModified()) {
debugLog("not modified, saving network bytes from useless fetch"); debugLog("not modified, saving network bytes from useless fetch");
fetched.delete();
return; return;
} }
@@ -239,6 +247,8 @@ public class Sucker {
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
if(fetched!=null)
fetched.delete();
debugLog("Done."); debugLog("Done.");
} }
@@ -277,7 +287,7 @@ public class Sucker {
try { try {
while ((ls_str = ls_in.readLine()) != null) { while ((ls_str = ls_in.readLine()) != null) {
debugLog(pushScript + ": " + ls_str); infoLog(pushScript + ": " + ls_str);
} }
} catch (IOException e) { } catch (IOException e) {
return false; return false;
@@ -301,6 +311,8 @@ public class Sucker {
*/ */
private String convertToSml(SyndEntry e) { private String convertToSml(SyndEntry e) {
String subject; String subject;
stripNewlines=false;
// Calculate messageId, and check if we have got the message already // Calculate messageId, and check if we have got the message already
String feedHash = sha1(urlToLoad); String feedHash = sha1(urlToLoad);
@@ -317,7 +329,7 @@ public class Sucker {
if (existsInHistory(messageId)) if (existsInHistory(messageId))
return null; return null;
debugLog("new: " + messageId); infoLog("new: " + messageId);
try { try {
@@ -375,11 +387,11 @@ public class Sucker {
fileTypes); fileTypes);
if(uri==null) { if(uri==null) {
debugLog("pushToSyndie failure."); errorLog("pushToSyndie failure.");
return null; return null;
} }
else else
debugLog("pushToSyndie success, uri: "+uri.toString()); infoLog("pushToSyndie success, uri: "+uri.toString());
} }
else else
{ {
@@ -389,22 +401,32 @@ public class Sucker {
fos.write(sml.getBytes()); fos.write(sml.getBytes());
if (pushScript != null) { if (pushScript != null) {
if (!execPushScript(""+messageNumber, time)) { if (!execPushScript(""+messageNumber, time)) {
debugLog("################## push failed"); errorLog("push script failed");
} else { } else {
debugLog("push success"); infoLog("push script success: nr "+messageNumber);
} }
} }
} }
messageNumber++; messageNumber++;
deleteTempFiles();
return messageId; return messageId;
} catch (FileNotFoundException e1) { } catch (FileNotFoundException e1) {
e1.printStackTrace(); e1.printStackTrace();
} catch (IOException e2) { } catch (IOException e2) {
e2.printStackTrace(); e2.printStackTrace();
} }
deleteTempFiles();
return null; return null;
} }
private void deleteTempFiles() {
Iterator iter = tempFiles.iterator();
while(iter.hasNext()) {
File tempFile = (File)iter.next();
tempFile.delete();
}
}
private String htmlToSml(String html) { private String htmlToSml(String html) {
String sml=""; String sml="";
@@ -414,15 +436,17 @@ public class Sucker {
for(i=0;i<html.length();) for(i=0;i<html.length();)
{ {
if(html.charAt(i)=='<') char c=html.charAt(i);
if(c=='<')
{ {
//log("html: "+html.substring(i)); //log("html: "+html.substring(i));
int tagLen = findTagLen(html.substring(i)); int tagLen = findTagLen(html.substring(i));
if(tagLen<=0) if(tagLen<=0) {
{ // did not find anything that looks like tag, treat it like text
debugLog("Bad html? ("+html+")"); sml+="&lt;";
break; i++;
continue;
} }
// //
String htmlTag = html.substring(i,i+tagLen); String htmlTag = html.substring(i,i+tagLen);
@@ -430,18 +454,25 @@ public class Sucker {
//log("htmlTag: "+htmlTag); //log("htmlTag: "+htmlTag);
String smlTag = htmlTagToSmlTag(htmlTag); String smlTag = htmlTagToSmlTag(htmlTag);
if(smlTag!=null) if(smlTag!=null) {
sml+=smlTag; sml+=smlTag;
i+=tagLen; i+=tagLen;
//log("tagLen: "+tagLen); sml+=" ";
sml+=" "; } else {
// Unrecognized tag, treat it as text
sml+="&lt;";
i++;
continue;
}
} }
else else
{ {
char c=html.charAt(i++); if( !stripNewlines || (c!='\r' && c!='\n')) {
sml+=c;
if(c=='[' || c==']')
sml+=c; sml+=c;
if(c=='[' || c==']')
sml+=c;
}
i++;
} }
} }
@@ -449,10 +480,18 @@ public class Sucker {
} }
private String htmlTagToSmlTag(String htmlTag) { private String htmlTagToSmlTag(String htmlTag) {
final String ignoreTags[] = {
"span",
"tr",
"td",
"th",
"div",
"input"
};
String ret=""; String ret="";
String htmlTagLowerCase=htmlTag.toLowerCase(); String htmlTagLowerCase=htmlTag.toLowerCase();
if(importEnclosures && htmlTagLowerCase.startsWith("<img")) if(htmlTagLowerCase.startsWith("<img"))
{ {
debugLog("Found image tag: "+htmlTag); debugLog("Found image tag: "+htmlTag);
int a,b; int a,b;
@@ -462,7 +501,7 @@ public class Sucker {
b++; b++;
String imageLink=htmlTag.substring(a,b); String imageLink=htmlTag.substring(a,b);
if(pendingEndLink) { if(pendingEndLink) { // <a href="..."><img src="..."></a> -> [link][/link][img][/img]
ret="[/link]"; ret="[/link]";
pendingEndLink=false; pendingEndLink=false;
} }
@@ -493,7 +532,7 @@ public class Sucker {
return ret; return ret;
} }
if(importRefs && htmlTagLowerCase.startsWith("<a ")) if(htmlTagLowerCase.startsWith("<a "))
{ {
debugLog("Found link tag: "+htmlTag); debugLog("Found link tag: "+htmlTag);
int a,b; int a,b;
@@ -520,8 +559,10 @@ public class Sucker {
} }
if ("</a>".equals(htmlTagLowerCase)) { if ("</a>".equals(htmlTagLowerCase)) {
if (pendingEndLink) if (pendingEndLink) {
pendingEndLink=false;
return "[/link]"; return "[/link]";
}
} }
if("<b>".equals(htmlTagLowerCase)) if("<b>".equals(htmlTagLowerCase))
@@ -536,6 +577,23 @@ public class Sucker {
return "[i]"; return "[i]";
if("</em>".equals(htmlTagLowerCase)) if("</em>".equals(htmlTagLowerCase))
return "[/i]"; return "[/i]";
if(htmlTagLowerCase.startsWith("<br")) {
stripNewlines=true;
return "\n";
}
if("</br>".equals(htmlTagLowerCase))
return "";
if(htmlTagLowerCase.startsWith("<table") || "</table>".equals(htmlTagLowerCase)) // emulate table with hr
return "[hr][/hr]";
for(int i=0;i<ignoreTags.length;i++) {
String openTag = "<"+ignoreTags[i];
String closeTag = "</"+ignoreTags[i];
if(htmlTagLowerCase.startsWith(openTag))
return "";
if(htmlTagLowerCase.startsWith(closeTag))
return "";
}
return null; return null;
} }
@@ -544,7 +602,7 @@ public class Sucker {
link=link.replaceAll("&amp;","&"); link=link.replaceAll("&amp;","&");
debugLog("Fetch attachment from: "+link); infoLog("Fetch attachment from: "+link);
File fetched; File fetched;
if(pushToSyndie) { if(pushToSyndie) {
@@ -556,7 +614,7 @@ public class Sucker {
e.printStackTrace(); e.printStackTrace();
return; return;
} }
fetched.deleteOnExit(); tempFiles.add(fetched);
} else { } else {
String attachmentPath = messagePath+"."+attachmentCounter; String attachmentPath = messagePath+"."+attachmentCounter;
fetched = new File(attachmentPath); fetched = new File(attachmentPath);
@@ -574,6 +632,7 @@ public class Sucker {
fetched.delete(); fetched.delete();
return; return;
} }
tempFiles.add(fetched);
String filename=EepGet.suggestName(link); String filename=EepGet.suggestName(link);
String contentType = get.getContentType(); String contentType = get.getContentType();
if(contentType==null) if(contentType==null)
@@ -592,6 +651,20 @@ public class Sucker {
attachmentCounter++; attachmentCounter++;
} }
private void errorLog(String string) {
if (_log.shouldLog(Log.ERROR))
_log.error(string);
if(!pushToSyndie)
System.out.println(string);
}
private void infoLog(String string) {
if (_log.shouldLog(Log.INFO))
_log.info(string);
if(!pushToSyndie)
System.out.println(string);
}
private void debugLog(String string) { private void debugLog(String string) {
if (_log.shouldLog(Log.DEBUG)) if (_log.shouldLog(Log.DEBUG))
_log.debug(string); _log.debug(string);
@@ -612,7 +685,6 @@ public class Sucker {
i++; i++;
} }
} }
System.out.println("WTF in Sucker.findTagLen("+s+")");
return -1; return -1;
} }

View File

@@ -371,10 +371,15 @@ public class SMLParser {
valStart = off; valStart = off;
} else { } else {
valEnd = off; valEnd = off;
String name = source.substring(nameStart, nameEnd); if ( ( nameStart >= 0 ) &&
String val = source.substring(valStart+1, valEnd); ( nameEnd >= 0 ) &&
rv.put(name.trim(), val.trim()); ( valStart >= 0 ) &&
( valEnd >= 0 )) {
String name = source.substring(nameStart, nameEnd);
String val = source.substring(valStart+1, valEnd);
rv.put(name.trim(), val.trim());
}
nameStart = -1; nameStart = -1;
nameEnd = -1; nameEnd = -1;
valStart = -1; valStart = -1;
@@ -450,6 +455,7 @@ public class SMLParser {
test("A: B\n\n[b]This[/b] is [i]special[/i][cut]why?[/cut][u]because I say so[/u].\neven if you dont care"); test("A: B\n\n[b]This[/b] is [i]special[/i][cut]why?[/cut][u]because I say so[/u].\neven if you dont care");
test("A: B\n\nHi\n[pre]>foo&bar<>blah!blah\nblah\nblah[/pre]foo![pre]bar[/pre]"); test("A: B\n\nHi\n[pre]>foo&bar<>blah!blah\nblah\nblah[/pre]foo![pre]bar[/pre]");
//(openTagEnd seems wrong) test("A: B\n\n[link schema=\"web\" location=\"http://w.i2p?i2paddr...\"] Try it [[i2p]] [/link]");
} }
private static void test(String rawSML) { private static void test(String rawSML) {
I2PAppContext ctx = I2PAppContext.getGlobalContext(); I2PAppContext ctx = I2PAppContext.getGlobalContext();

View File

@@ -1,4 +1,9 @@
$Id: history.txt,v 1.312 2005/11/01 19:35:21 jrandom Exp $ $Id: history.txt,v 1.313 2005/11/03 20:20:18 jrandom Exp $
2005-11-05 dust
* Fix sucker to delete its temporary files.
* Improve sucker's sml output some.
* Fix Exception in SMLParser for weird sml.
2005-11-03 zzz 2005-11-03 zzz
* Added a new error page to the eepproxy to differentiate the full 60 * Added a new error page to the eepproxy to differentiate the full 60

View File

@@ -15,9 +15,9 @@ import net.i2p.CoreVersion;
* *
*/ */
public class RouterVersion { public class RouterVersion {
public final static String ID = "$Revision: 1.281 $ $Date: 2005/10/30 00:47:57 $"; public final static String ID = "$Revision: 1.282 $ $Date: 2005/11/03 20:20:17 $";
public final static String VERSION = "0.6.1.4"; public final static String VERSION = "0.6.1.4";
public final static long BUILD = 2; public final static long BUILD = 3;
public static void main(String args[]) { public static void main(String args[]) {
System.out.println("I2P Router version: " + VERSION + "-" + BUILD); System.out.println("I2P Router version: " + VERSION + "-" + BUILD);
System.out.println("Router ID: " + RouterVersion.ID); System.out.println("Router ID: " + RouterVersion.ID);