KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > killingar > forum > comics > IndirectLinkComicsSystem


1 /* Copyright 2000-2005 Mattias Flodin
2  *
3  * The person or persons who have associated their work with
4  * this document (the "Dedicator") hereby dedicate the entire
5  * copyright in the work of authorship identified below (the
6  * "Work") to the public domain.
7  *
8  * Dedicator makes this dedication for the benefit of the
9  * public at large and to the detriment of Dedicator's heirs
10  * and successors. Dedicator intends this dedication to be an
11  * overt act of relinquishment in perpetuity of all present
12  * and future rights under copyright law, whether vested or
13  * contingent, in the Work. Dedicator understands that such
14  * relinquishment of all rights includes the relinquishment of
15  * all rights to enforce (by lawsuit or otherwise) those
16  * copyrights in the Work.
17  *
18  * Dedicator recognizes that, once placed in the public
19  * domain, the Work may be freely reproduced, distributed,
20  * transmitted, used, modified, built upon, or otherwise
21  * exploited by anyone for any purpose, commercial or non-
22  * commercial, and in any way, including by methods that have
23  * not yet been invented or conceived.
24  */

25
26 /**
27  * For comics where there exists a single page that links to all the comic
28  * pages.
29  * Arguments are (optional arguments in [] brackets):
30  * Class name.
31  * Link to first page.
32  * group1,group2:Regular expression for finding links on first page.
33  * group:Regular expression for finding the strip image on the second page
34  * (i.e. those linked to by the links on the first page).
35  * Ordering mode.
36  * [start text]
37  * [end text]
38  *
39  * The link-finding regular expression is preceded by two numbers (group1 and
40  * group2) and a colon. The numbers indicate which regex group index should be
41  * used for the subpage URL and the strip description, respectively. The strip
42  * image regular expression is preceded by one number (group) and a colon. The
43  * number indicates which regex group index to use for obtaining the link to
44  * the image file. Relative URLs are allowed in both regex results. The
45  * ordering mode can be one of: plain, reversed, sort. The plain mode indexes
46  * the comics in the order they appear on the first page. The reverse mode
47  * reverses the order of the plain mode, and the sort mode will sort the comics
48  * lexicographically by their URL string (that is, the URL on the first page).
49  *
50  * If start and end text are given the pattern matching will be run only between
51  * the first occurance of the start text to and including the last occurance of
52  * the end text.
53  */

54 /*
55  * Example for http://www.slowwave.com
56  * net.killingar.forum.comics.IndirectLinkComicsSystem
57  * http://www.slowwave.com/archive.php
58  * 1,2:<a HREF="(index.php\?date=\d\d-\d\d-\d\d)\">"([^"]+)"<
59  * 1:<img SRC="(/Img/s\d+/[^"]+)"
60  * reverse
61  */

62
63 package net.killingar.forum.comics;
64
65 import net.killingar.StringUtils;
66 import net.killingar.forum.internal.Strip;
67
68 import java.io.PrintWriter JavaDoc;
69 import java.net.URL JavaDoc;
70 import java.util.ArrayList JavaDoc;
71 import java.util.regex.Matcher JavaDoc;
72 import java.util.regex.Pattern JavaDoc;
73
74 public class IndirectLinkComicsSystem extends ComicsSystem
75 {
76     final static int
77         plain = 0,
78         reverse = 1,
79         sort = 2;
80
81     static java.sql.Date JavaDoc dummydate = new java.sql.Date JavaDoc(0);
82
83     static class Settings {
84         int position;
85         String JavaDoc linkpage;
86         int link_group;
87         int description_group;
88         String JavaDoc linkpage_regex;
89         int img_group;
90         String JavaDoc imgpage_regex;
91         int sortmode;
92         String JavaDoc startText;
93         String JavaDoc endText;
94     }
95
96     public void update(String JavaDoc arguments[])
97     {
98         try {
99             try {
100                 Settings settings = parseArguments(arguments, comic.position);
101
102                 // Read all the matching URLs on the first page.
103
URL JavaDoc linkspage_url = new URL JavaDoc(settings.linkpage);
104                 String JavaDoc page = StringUtils.readURLAsString(linkspage_url);
105                 //System.err.print(page);
106
int groups[] = new int[2];
107                 groups[0] = settings.link_group;
108                 groups[1] = settings.description_group;
109
110                 // crop to start and stop page commands
111
if (settings.startText != null && page.indexOf(settings.startText) != -1)
112                     page = page.substring(page.indexOf(settings.startText));
113
114                 if (settings.endText != null && page.indexOf(settings.endText) != -1)
115                     page = page.substring(0, page.indexOf(settings.endText));
116
117                 ArrayList JavaDoc matches = findAllPatternMatches(page,
118                         settings.linkpage_regex, groups);
119
120                 // Reorder the matches if the user requested.
121
if(settings.sortmode == reverse)
122                     java.util.Collections.reverse(matches);
123                 else if(settings.sortmode == sort)
124                     java.util.Collections.sort(matches);
125
126                 // Get each URL's content, and use the second regex to find the
127
// image address.
128
Pattern JavaDoc image_ptn = Pattern.compile(settings.imgpage_regex);
129                 for(int i=Integer.parseInt(comic.position); i<matches.size(); i++) {
130
131                     // Read the subpage and find the img link in it.
132
String JavaDoc[] url_and_desc = (String JavaDoc[]) matches.get(i);
133                     URL JavaDoc url = new URL JavaDoc(linkspage_url, url_and_desc[0]);
134                     page = StringUtils.readURLAsString(url);
135                     Matcher JavaDoc matcher = image_ptn.matcher(page);
136                     matcher.find();
137                     url = new URL JavaDoc(url, (String JavaDoc) matcher.group(1));
138
139                     // Add the strip.
140
if (!debug) {
141                         cmgr.addStrip(new Strip(comic.ID, url.toString(), dummydate, url_and_desc[1], url_and_desc[1]));
142
143                         comic = cmgr.getComic(comic.ID);
144                         comic.position = Integer.toString(i);
145                         cmgr.changeComic(comic);
146
147                         log("! ");
148                     }
149                     else {
150                         logln(url_and_desc[1]+"("+url_and_desc[0]+")");
151                     }
152
153                     //System.err.println(url);
154
}
155                 if (!debug) {
156                     comic = cmgr.getComic(comic.ID);
157                     comic.position = Integer.toString(matches.size());
158                     cmgr.changeComic(comic);
159                 }
160
161             } catch(java.lang.IllegalArgumentException JavaDoc e) {
162                 logln("Error: " + e.getMessage());
163             } catch(java.io.IOException JavaDoc e) {
164                 logln("Error: " + e.getMessage());
165             }
166         } catch(Exception JavaDoc e) {
167             if (out != null) {
168                 PrintWriter JavaDoc w = new PrintWriter JavaDoc(out);
169                 w.println("exception while trying to find url (");
170                 w.println(arguments[1]);
171                 w.println("):");
172                 e.printStackTrace(w);
173             }
174         }
175     }
176
177     Settings parseArguments(String JavaDoc[] arguments, String JavaDoc position)
178         throws IllegalArgumentException JavaDoc
179     {
180         Settings settings = new Settings();
181         String JavaDoc err;
182         String JavaDoc item = null;
183         try {
184             item = "position";
185             settings.position = Integer.parseInt(position);
186             item = "link page regex";
187             settings.linkpage = arguments[1];
188             int commapos = arguments[2].indexOf(',');
189             int colonpos = arguments[2].indexOf(':');
190             settings.link_group = Integer.parseInt(arguments[2].substring(0, commapos));
191             settings.description_group = Integer.parseInt(
192                     arguments[2].substring(commapos+1, colonpos));
193             settings.linkpage_regex = arguments[2].substring(colonpos+1);
194             item = "image page regex";
195             colonpos = arguments[3].indexOf(':');
196             settings.img_group = Integer.parseInt(arguments[3].substring(0, colonpos));
197             settings.imgpage_regex = arguments[3].substring(colonpos+1);
198
199             if(arguments[4].equals("plain"))
200                 settings.sortmode = plain;
201             else if(arguments[4].equals("reverse"))
202                 settings.sortmode = reverse;
203             else if(arguments[4].equals("sort"))
204                 settings.sortmode = sort;
205             else {
206                 err = "unsupported ordering method '"
207                     + arguments[4] + "'.";
208                 throw new IllegalArgumentException JavaDoc(err);
209             }
210
211             // optional arguments
212
if (arguments.length > 5)
213                 settings.startText = arguments[5];
214             if (arguments.length > 6)
215                 settings.endText = arguments[6];
216
217             if(arguments.length > 7)
218                 err = " too many arguments.";
219             else
220                 return settings;
221         } catch(Exception JavaDoc e) {
222             err = item + " argument is invalid.";
223             e.printStackTrace();
224         }
225         throw new IllegalArgumentException JavaDoc(err);
226     }
227
228     // Return a list of arrays containing the requested groups of all the regex
229
// matches found in a string.
230
ArrayList JavaDoc findAllPatternMatches(String JavaDoc text, String JavaDoc patternString, int[] groups)
231     {
232         //System.err.println("Search with pattern: " + patternString);
233
Pattern JavaDoc pattern = Pattern.compile(patternString);
234         Matcher JavaDoc matcher = pattern.matcher(text);
235         ArrayList JavaDoc array = new ArrayList JavaDoc();
236         while(matcher.find()) {
237             String JavaDoc[] res = new String JavaDoc[groups.length];
238             for(int g=0; g!=groups.length; g++)
239                 res[g] = matcher.group(groups[g]);
240             //System.err.println("Found: " + matcher.group(1));
241
array.add(res);
242         }
243         return array;
244     }
245 }
246
247
248
Popular Tags