1 25 29 package org.jresearch.gossip.contrib.util; 30 31 import java.io.File ; 32 import java.io.FileInputStream ; 33 import java.io.IOException ; 34 import java.text.ParseException ; 35 import java.text.SimpleDateFormat ; 36 import java.util.ArrayList ; 37 import java.util.Date ; 38 import java.util.HashMap ; 39 import java.util.Iterator ; 40 41 import javax.mail.BodyPart ; 42 import javax.mail.MessagingException ; 43 import javax.mail.Session ; 44 import javax.mail.internet.MimeBodyPart ; 45 import javax.mail.internet.MimeMessage ; 46 import javax.mail.internet.MimeMultipart ; 47 48 77 78 public class MList2JGossip { 79 80 private static String VERSION_STR = "1.1"; 81 82 private boolean debug = false; 83 84 private boolean verbose = false; 85 86 private Session session = null; 87 88 private int fForumId = 25; 89 90 private int fThreadId = 100; 91 92 private int fMsgId = 500; 93 94 private String fSender = "anonymous"; 95 96 private String fIpAddress = "127.0.0.1"; 97 98 private int errDupes = 0; 99 100 private HashMap allMsgs = null; 101 102 private HashMap subjects = null; 103 104 private HashMap bySubject = null; 105 106 private HashMap byMsgId = null; 107 108 private SimpleDateFormat dateFmt = null; 109 110 public static void main(String [] argv) { 111 MList2JGossip app = new MList2JGossip(); 112 app.instanceMain(argv); 113 } 114 115 public void instanceMain(String [] argv) { 116 this.allMsgs = new HashMap (1024); 117 this.subjects = new HashMap (1024); 118 this.bySubject = new HashMap (1024); 119 this.byMsgId = new HashMap (1024); 120 121 this.dateFmt = new SimpleDateFormat ("yyyyMMddHHmmss"); 122 123 this.session = Session.getDefaultInstance(System.getProperties(), null); 124 125 this.processArguments(argv); 126 127 try { 128 this.processArchives(); 129 } catch (Exception ex) { 130 ex.printStackTrace(System.err); 131 } 132 } 133 134 private String escapeString(String str) { 135 StringBuffer buf = new StringBuffer (); 136 137 int chIdx = 0; 138 int offset = 0; 139 int strLen = str.length(); 140 for (chIdx = 0; chIdx < strLen; ++chIdx) { 141 char ch = str.charAt(chIdx); 142 143 if (ch == '\'') { 144 buf.append("''"); 145 } else if (ch == '\\') { 146 buf.append("\\\\"); 147 } else { 148 buf.append(ch); 149 } 150 } 151 152 return buf.toString(); 153 } 154 155 private void processArchives() throws IOException , MessagingException { 156 Iterator threads = this.bySubject.values().iterator(); 157 for (; threads.hasNext();) { 158 boolean insertedThread = false; 159 ArrayList thread = (ArrayList ) threads.next(); 160 161 for (int ti = 0; ti < thread.size(); ++ti) { 162 String msgId = (String ) thread.get(ti); 163 164 try { 165 MimeMessage msg = (MimeMessage ) this.allMsgs.get(msgId); 166 String subject = msg.getSubject(); 167 Date sentDate = msg.getSentDate(); 168 String dateStr = this.dateFmt.format(sentDate); 169 170 if (msg != null) { 171 if (!insertedThread) { 172 ++this.fThreadId; 173 insertedThread = true; 174 175 System.out 176 .println("INSERT INTO jrf_thread " 177 + "( threadid, forumid, lintime, locked, sortby )"); 178 179 System.out.println(" VALUES " + "( " 180 + this.fThreadId + ", " + this.fForumId 181 + ", " + "'" + dateStr + "', " + "0, " 182 + "9" + " ); "); 183 } 184 185 if (subject == null) 186 subject = ""; 187 188 if (subject.length() > 252) 189 subject = subject.substring(0, 252); 190 191 subject = this.escapeString(subject); 192 193 String bodyStr = null; 194 Object bodyObj = msg.getContent(); 195 if (bodyObj instanceof String ) { 196 bodyStr = this.escapeString((String ) bodyObj); 197 } else if (bodyObj instanceof MimeMultipart ) { 198 System.err.println("MULTIPART [" + msgId + "] "); 199 200 MimeMultipart multi = (MimeMultipart ) bodyObj; 201 int cnt = multi.getCount(); 202 for (int bpi = 0; bpi < cnt; ++bpi) { 203 BodyPart bp = multi.getBodyPart(bpi); 204 if (bp instanceof MimeBodyPart ) { 205 MimeBodyPart mbp = (MimeBodyPart ) bp; 206 String cType = mbp.getContentType(); 207 if (cType.startsWith("text/")) { 208 bodyObj = mbp.getContent(); 209 bodyStr = this 210 .escapeString((String ) bodyObj); 211 break; 212 } else { 213 System.err.println(" SKIP " + cType); 214 } 215 } else { 216 System.err.println("MSG [" + msgId 217 + "] BP [" + bpi + "] isa " 218 + bp.getClass().getName()); 219 } 220 } 221 } else { 222 System.err.println("MSG [" + msgId + "] isa " 223 + bodyObj.getClass().getName()); 224 } 225 226 if (bodyStr != null) { 227 System.out 228 .println("INSERT INTO jrf_message " 229 + "( id, sender, centents, intime, heading, threadid, ip )"); 230 231 System.out.println(" VALUES " + "( " 232 + this.fMsgId++ + ", " + "'" + this.fSender 233 + "', " + "'" + bodyStr + "', " + "'" 234 + dateStr + "', " + "'" + subject + "', " 235 + this.fThreadId + ", " + "'" 236 + this.fIpAddress + "'" + " ); "); 237 } 238 } 239 } catch (Exception ex) { 240 System.err.println(ex.getClass().getName() + ": " 241 + ex.getMessage()); 242 System.err.println(" " + msgId); 243 ex.printStackTrace(System.err); 244 } 245 } 246 } 247 } 248 249 private void addBySubject(String subject, String msgId) { 250 String sub = subject; 251 if (sub.length() > 50) 252 sub = sub.substring(0, 50); 253 254 ArrayList ma = (ArrayList ) this.bySubject.get(sub); 255 256 if (ma == null) { 257 ma = new ArrayList (); 260 this.bySubject.put(sub, ma); 261 } 262 263 if (!ma.contains(msgId)) 264 ma.add(msgId); 265 266 this.byMsgId.put(msgId, ma); 267 } 268 269 private void loadArchive(String archivePath) throws IOException , 270 NumberFormatException , ParseException { 271 int numMsgs = 0; 272 long begMillis = System.currentTimeMillis(); 273 274 File archDirF = new File (archivePath); 275 276 if (this.verbose) 277 System.err.println("LOAD MESSAGES FROM: " + archDirF.getPath()); 278 279 if (!archDirF.exists()) { 280 System.err.println("Directory '" + archivePath 281 + "' does not exist."); 282 return; 283 } 284 285 if (!archDirF.isDirectory()) { 286 System.err 287 .println("Path '" + archivePath + "' is not a directory."); 288 return; 289 } 290 291 String [] dirList = archDirF.list(); 292 293 for (int idx = 0; idx < dirList.length; ++idx) { 294 File f = new File (archDirF, dirList[idx]); 295 296 if (f.exists() && f.isFile()) { 297 FileInputStream fis = null; 298 299 try { 300 fis = new FileInputStream (f); 301 MimeMessage msg = new MimeMessage (this.session, fis); 302 303 String msgId = msg.getMessageID(); 304 if (this.debug) 305 System.err.println("MSGID: " + msgId); 306 307 if (this.allMsgs.containsKey(msgId)) { 308 ++this.errDupes; 309 System.err.println("ERROR: duplicate msgID '" + msgId 310 + "'"); 311 } else { 312 this.allMsgs.put(msgId, msg); 313 } 314 315 Date msgDate = msg.getSentDate(); 316 if (this.debug) 317 System.err.println(" DATE: " + msgDate); 318 319 String subject = msg.getSubject(); 320 this.subjects.put(msgId, subject); 321 if (this.debug) 322 System.err.println(" SUBJ: " + subject); 323 324 String sub = (subject == null ? "" : subject); 325 while (sub.toUpperCase().startsWith("RE: ")) { 326 sub = sub.substring(4); 327 } 328 329 this.addBySubject(sub, msgId); 330 331 ++numMsgs; 332 } catch (Exception ex) { 333 System.err.println("Error loading '" + f.getPath() + "', " 334 + ex.getMessage()); 335 ex.printStackTrace(System.err); 336 } finally { 337 if (fis != null) 338 fis.close(); 339 } 340 } 341 } 342 343 long endMillis = System.currentTimeMillis(); 344 345 if (this.verbose) 346 System.err.println("Processed " + numMsgs + " messages " + " in " 347 + ((endMillis - begMillis) / 1000) + " seconds."); 348 349 if (this.verbose) 350 System.err.println("Processed " + numMsgs + " messages " 351 + " producing " + this.bySubject.size() + " threads."); 352 353 if (this.verbose) 354 System.err.println("ERRORS: Duplicate MsgIDs = " + this.errDupes); 355 } 356 357 private void processArguments(String [] argv) { 358 int i = 0; 359 360 for (; i < argv.length; ++i) { 361 if (!argv[i].startsWith("-") || argv[i].equals("--")) { 362 break; 363 } else if (argv[i].equals("-?") || argv[i].equals("--help")) { 364 this.printUsageAndExit(); 365 } else if (argv[i].equals("--debug")) { 366 this.debug = true; 367 } else if (argv[i].equals("--verbose")) { 368 this.verbose = true; 369 } else if (argv[i].equals("--sender")) { 370 this.fSender = argv[++i]; 371 } else if (argv[i].equals("--ipaddr")) { 372 this.fIpAddress = argv[++i]; 373 } else if (argv[i].equals("--forum")) { 374 ++i; 375 try { 376 this.fForumId = Integer.parseInt(argv[i]); 377 } catch (Exception ex) { 378 ex.printStackTrace(System.err); 379 this.printUsageAndExit(); 380 } 381 } else if (argv[i].equals("--threadid")) { 382 ++i; 383 try { 384 this.fThreadId = Integer.parseInt(argv[i]); 385 } catch (Exception ex) { 386 ex.printStackTrace(System.err); 387 this.printUsageAndExit(); 388 } 389 } else if (argv[i].equals("--msgid")) { 390 ++i; 391 try { 392 this.fMsgId = Integer.parseInt(argv[i]); 393 } catch (Exception ex) { 394 ex.printStackTrace(System.err); 395 this.printUsageAndExit(); 396 } 397 } else if (argv[i].equals("--archive")) { 398 try { 399 this.loadArchive(argv[++i]); 400 } catch (Exception ex) { 401 ex.printStackTrace(System.err); 402 } 403 } else { 404 System.err.println("UNKNOWN OPTION: " + argv[i]); 405 this.printUsageAndExit(); 406 } 407 } 408 } 409 410 public void printUsageAndExit() { 411 System.err.println("usage: " + this.getClass().getName() 412 + " [options] --archive path [--archive path]..."); 413 System.err.println("version: " + VERSION_STR); 414 System.err.println("options:"); 415 System.err 416 .println(" --archive path Mailing list archive path."); 417 System.err 418 .println(" --debug Turn on debugging output."); 419 System.err 420 .println(" --verbose Turn on operational verbosity."); 421 System.err 422 .println(" --forum id Forum ID assigned to each message."); 423 System.err.println(" --msgid id Beginning message id."); 424 System.err.println(" --threadid id Beginning thread id."); 425 System.err 426 .println(" --sender name Sender name assigned to each message."); 427 System.err 428 .println(" --ipaddr addr IP Address assigned to each message."); 429 System.exit(1); 430 } 431 432 } 433 | Popular Tags |