KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > gnu > regexp > util > Grep


1 /*
2  * gnu/regexp/util/Grep.java
3  * Copyright (C) 1998 Wes Biggs
4  * Copyright (C) 2001 Lee Sau Dan for the use of Reader for handling file I/O
5  * Copyright (C) 2001 Ulf Dittmer for support of grepping into ZIP files
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published
9  * by the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */

21
22 package gnu.regexp.util;
23
24 import gnu.getopt.Getopt;
25 import gnu.getopt.LongOpt;
26 import gnu.regexp.RE;
27 import gnu.regexp.REException;
28 import gnu.regexp.REMatch;
29 import gnu.regexp.RESyntax;
30 import java.io.BufferedReader JavaDoc;
31 import java.io.File JavaDoc;
32 import java.io.FileInputStream JavaDoc;
33 import java.io.FileNotFoundException JavaDoc;
34 import java.io.InputStream JavaDoc;
35 import java.io.InputStreamReader JavaDoc;
36 import java.io.IOException JavaDoc;
37 import java.io.PrintStream JavaDoc;
38 import java.io.UnsupportedEncodingException JavaDoc;
39 import java.util.Enumeration JavaDoc;
40 import java.util.Vector JavaDoc;
41 import java.util.zip.*;
42
43
44 /**
45  * Grep is a pure-Java clone of the GNU grep utility. As such, it is much
46  * slower and not as full-featured, but it has the advantage of being
47  * available on any system with a Java virtual machine.
48  *
49  * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
50  * <A HREF="http://www.csis.hku.hk/~sdlee/">Lee Sau Dan</A>
51  * <A HREF="http://www.capital.net/~dittmer/">Ulf Dittmer</A>
52  * @version 1.03
53  * @use gnu.getopt
54  */

55 public class Grep {
56   private static final int BYTE_OFFSET = 0;
57   private static final int COUNT = 1;
58   private static final int LINE_NUMBER = 2;
59   private static final int QUIET = 3;
60   private static final int SILENT = 4;
61   private static final int NO_FILENAME = 5;
62   private static final int REVERT_MATCH = 6;
63   private static final int FILES_WITH_MATCHES = 7;
64   private static final int LINE_REGEXP = 8;
65   private static final int FILES_WITHOUT_MATCH = 9;
66   private static final int EXPAND_ZIP_FILES = 10;
67
68   private static final String JavaDoc PROGNAME = "gnu.regexp.util.Grep";
69   private static final String JavaDoc PROGVERSION = "1.03";
70
71   private Grep() { }
72   /**
73    * Invokes the grep() function below with the command line arguments
74    * and using the RESyntax.RE_SYNTAX_GREP syntax, which attempts to
75    * emulate the traditional UNIX grep syntax.
76    */

77   public static void main(String JavaDoc[] argv) {
78     System.exit(grep(argv, RESyntax.RE_SYNTAX_GREP, System.out));
79   }
80
81   /**
82    * Runs Grep with the specified arguments. For a list of
83    * supported options, specify "--help".
84    *
85    * This is the meat of the grep routine, but unlike main(), you can
86    * specify your own syntax and PrintStream to use for output.
87    */

88   public static int grep(String JavaDoc[] argv, RESyntax syntax, PrintStream JavaDoc out) {
89     // use gnu.getopt to read arguments
90
int cflags = 0;
91     
92     boolean[] options = new boolean [10];
93
94     String JavaDoc encoding = null;
95     
96     LongOpt[] longOptions = {
97         new LongOpt("byte-offset", LongOpt.NO_ARGUMENT, null, 'b'),
98     new LongOpt("count", LongOpt.NO_ARGUMENT, null, 'c'),
99     new LongOpt("no-filename", LongOpt.NO_ARGUMENT, null, 'h'),
100     new LongOpt("ignore-case", LongOpt.NO_ARGUMENT, null, 'i'),
101     new LongOpt("files-with-matches", LongOpt.NO_ARGUMENT, null, 'l'),
102     new LongOpt("help", LongOpt.NO_ARGUMENT, null, '!'),
103     new LongOpt("line-number", LongOpt.NO_ARGUMENT, null, 'n'),
104     new LongOpt("quiet", LongOpt.NO_ARGUMENT, null, 'q'),
105     new LongOpt("silent", LongOpt.NO_ARGUMENT, null, 'q'),
106     new LongOpt("no-messages", LongOpt.NO_ARGUMENT, null, 's'),
107     new LongOpt("revert-match", LongOpt.NO_ARGUMENT, null, 'v'),
108     new LongOpt("line-regexp", LongOpt.NO_ARGUMENT, null, 'x'),
109     new LongOpt("extended-regexp", LongOpt.NO_ARGUMENT, null, 'E'),
110     new LongOpt("fixed-strings", LongOpt.NO_ARGUMENT, null, 'F'), // TODO
111
new LongOpt("basic-regexp", LongOpt.NO_ARGUMENT, null, 'G'),
112     new LongOpt("files-without-match", LongOpt.NO_ARGUMENT, null, 'L'),
113     new LongOpt("version", LongOpt.NO_ARGUMENT, null, 'V'),
114     new LongOpt("zip", LongOpt.NO_ARGUMENT, null, 'z'),
115     new LongOpt("encoding", LongOpt.REQUIRED_ARGUMENT, null, 'N')
116       };
117
118     Getopt g = new Getopt(PROGNAME, argv, "bchilnqsvxyEFGLVzN:", longOptions);
119     int c;
120     String JavaDoc arg;
121     while ((c = g.getopt()) != -1) {
122       switch (c) {
123       case 'b':
124     options[BYTE_OFFSET] = true;
125     break;
126       case 'c':
127     options[COUNT] = true;
128     break;
129       case 'h':
130     options[NO_FILENAME] = true;
131     break;
132       case 'i':
133       case 'y':
134     cflags |= RE.REG_ICASE;
135     break;
136       case 'l':
137     options[FILES_WITH_MATCHES] = true;
138     break;
139       case 'n':
140     options[LINE_NUMBER] = true;
141     break;
142       case 'q':
143     options[QUIET] = true;
144     break;
145       case 's':
146     options[SILENT] = true;
147     break;
148       case 'v':
149     options[REVERT_MATCH] = true;
150     break;
151       case 'x':
152     options[LINE_REGEXP] = true;
153     break;
154       case 'E': // TODO: check compatibility with grep
155
syntax = RESyntax.RE_SYNTAX_EGREP;
156     break;
157       case 'F': // TODO: fixed strings
158
break;
159       case 'G':
160     syntax = RESyntax.RE_SYNTAX_GREP;
161     break;
162       case 'L':
163     options[FILES_WITHOUT_MATCH] = true;
164     break;
165       case 'V':
166     System.err.println(PROGNAME+' '+PROGVERSION);
167     return 0;
168       case 'z':
169     options[EXPAND_ZIP_FILES] = true;
170     break;
171       case 'N':
172     encoding = g.getOptarg();
173     try { // try out this encoding now. If not found, fall back to default
174
"".getBytes(encoding);
175     } catch (UnsupportedEncodingException JavaDoc uee) {
176       System.err.println(PROGNAME+": (Warning)"
177                  + " Unsupported Encoding: " + encoding
178                  + "; reverting to default");
179       encoding = null;
180     }
181     break;
182       case '!': // help
183
try {
184           BufferedReader JavaDoc br = new BufferedReader JavaDoc(new InputStreamReader JavaDoc((Grep.class).getResourceAsStream("GrepUsage.txt"),"UTF8"));
185           String JavaDoc line;
186           while ((line = br.readLine()) != null)
187           out.println(line);
188       } catch (IOException JavaDoc ie) { }
189     return 0;
190       }
191     }
192     
193     InputStream JavaDoc is = null;
194     RE pattern = null;
195     if (g.getOptind() >= argv.length) {
196       System.err.println("Usage: java " + PROGNAME + " [OPTION]... PATTERN [FILE]...");
197       System.err.println("Try `java " + PROGNAME + " --help' for more information.");
198       return 2;
199     }
200     try {
201       pattern = new RE(argv[g.getOptind()],cflags,syntax);
202     } catch (REException e) {
203       System.err.println("Error in expression: "+e);
204       return 2;
205     }
206
207     boolean notFound = true;
208     if (argv.length >= g.getOptind()+2) {
209       for (int i = g.getOptind() + 1; i < argv.length; i++) {
210       boolean no_filename = (argv.length == g.getOptind()+2)
211           || options[NO_FILENAME];
212     if (argv[i].equals("-")) {
213         final String JavaDoc filename = no_filename ? null : "(standard input)";
214         if (processStream(pattern,System.in,encoding,options,filename,null,out))
215         notFound = false;
216     } else {
217         final String JavaDoc filename = no_filename ? null : argv[i];
218         try {
219         File JavaDoc file = new File JavaDoc(argv[i]);
220         if(file.isDirectory()) {
221             System.err.println(PROGNAME + ": " + argv[i] + ": Is a directory");
222         } else if(!file.canRead()) {
223             System.err.println(PROGNAME + ": " + argv[i] + ": Permission denied");
224         } else if (options[EXPAND_ZIP_FILES] && argv[i].endsWith(".zip")) {
225             // iterate over all files within this ZIP file
226
try {
227             ZipFile zf = new ZipFile(file);
228             Enumeration JavaDoc list = zf.entries();
229             while (list.hasMoreElements()) {
230                 ZipEntry ze = (ZipEntry) list.nextElement();
231                 if (! ze.isDirectory()) {
232                 if (processStream(pattern, zf.getInputStream(ze), encoding, options, filename, ze.getName(), out))
233                     notFound = false;
234                 }
235             }
236             } catch (Exception JavaDoc ex) {
237             System.err.println(PROGNAME + ": " + argv[i] + ": Problem reading ZIP file");
238             return 2;
239             }
240         } else {
241             if (processStream(pattern,
242                       new FileInputStream JavaDoc(argv[i]),
243                       encoding, options, filename, null, out))
244             notFound = false;
245         }
246         } catch (FileNotFoundException JavaDoc e) {
247         if (!options[SILENT])
248             System.err.println(PROGNAME+": "+e);
249         }
250     }
251       }
252     } else {
253     if (processStream(pattern,System.in,encoding,options,null,null,out))
254         notFound = false;
255     }
256     return notFound ? 1 : 0;
257   }
258
259   private static boolean processStream(RE pattern, InputStream JavaDoc is,
260                        String JavaDoc encoding, boolean[] options,
261                        String JavaDoc filename, String JavaDoc zipName,
262                        PrintStream JavaDoc out) {
263     try {
264       final InputStreamReader JavaDoc isr = encoding == null?
265     new InputStreamReader JavaDoc(is) : new InputStreamReader JavaDoc(is,encoding);
266       final BufferedReader JavaDoc r = new BufferedReader JavaDoc(isr);
267       return processReader(pattern, r, options, filename, zipName, out);
268     } catch (UnsupportedEncodingException JavaDoc uee) {
269       /* since grep() should have checked that the 'encoding' parameter
270      is valid, it should be impossible that this exception would
271      happen. Of, sso, it is a logic error.
272       */

273       throw new Error JavaDoc(PROGNAME + ": programming logic error");
274     }
275   }
276
277     private static String JavaDoc fileNameString (String JavaDoc fileName, String JavaDoc zipName) {
278     if (zipName == null)
279         return fileName;
280     else
281         return zipName + " in " + fileName;
282     }
283
284   private static boolean processReader(RE pattern,
285                        BufferedReader JavaDoc br,
286                        boolean[] options, String JavaDoc filename,
287                        String JavaDoc zipName, PrintStream JavaDoc out) {
288
289     int newlineLen = System.getProperty("line.separator").length();
290     int count = 0;
291     long atByte = 0;
292     int atLine = 1;
293     String JavaDoc line;
294     REMatch match;
295     
296     try {
297       while ((line = br.readLine()) != null) {
298     match = pattern.getMatch(line);
299     if (((options[LINE_REGEXP] && pattern.isMatch(line))
300          || (!options[LINE_REGEXP] && (match != null)))
301         ^ options[REVERT_MATCH]) {
302       count++;
303       if (!options[COUNT]) {
304         if (options[QUIET]) {
305           return true;
306         }
307         if (options[FILES_WITH_MATCHES]) {
308           if (filename != null)
309         out.println(fileNameString(filename, zipName));
310           return true;
311         }
312         if (options[FILES_WITHOUT_MATCH]) {
313           return false;
314         }
315         if (filename != null) {
316         out.print(fileNameString(filename, zipName));
317         out.print(':');
318         }
319         if (options[LINE_NUMBER]) {
320           out.print(atLine);
321           out.print(':');
322         }
323         if (options[BYTE_OFFSET]) {
324           out.print(atByte + match.getStartIndex() );
325           out.print(':');
326         }
327         out.println(line);
328       }
329     } // a match
330
atByte += line.length() + newlineLen; // could be troublesome...
331
atLine++;
332       } // a valid line
333
br.close();
334
335       if (options[COUNT]) {
336     if (filename != null)
337       out.println(fileNameString(filename, zipName)+':');
338     out.println(count);
339       }
340       if (options[FILES_WITHOUT_MATCH] && count==0) {
341     if (filename != null)
342       out.println(fileNameString(filename, zipName));
343       }
344     } catch (IOException JavaDoc e) {
345       System.err.println(PROGNAME+": "+e);
346     }
347     return ((count > 0) ^ options[REVERT_MATCH]);
348   }
349 }
350
Popular Tags