KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > app > mediafilter > MediaFilterManager


1 /*
2  * MediaFilterManager.java
3  *
4  * Version: $Revision: 1.19 $
5  *
6  * Date: $Date: 2006/03/30 02:46:42 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40
41 package org.dspace.app.mediafilter;
42
43 import java.util.HashMap JavaDoc;
44 import java.util.Map JavaDoc;
45 import java.util.List JavaDoc;
46 import java.util.Arrays JavaDoc;
47
48 import org.apache.commons.cli.CommandLine;
49 import org.apache.commons.cli.CommandLineParser;
50 import org.apache.commons.cli.HelpFormatter;
51 import org.apache.commons.cli.Options;
52 import org.apache.commons.cli.PosixParser;
53
54 import org.dspace.content.Bitstream;
55 import org.dspace.content.BitstreamFormat;
56 import org.dspace.content.Bundle;
57 import org.dspace.content.Collection;
58 import org.dspace.content.Community;
59 import org.dspace.content.DSpaceObject;
60 import org.dspace.content.Item;
61 import org.dspace.content.ItemIterator;
62 import org.dspace.core.ConfigurationManager;
63 import org.dspace.core.Constants;
64 import org.dspace.core.Context;
65 import org.dspace.core.PluginManager;
66 import org.dspace.handle.HandleManager;
67 import org.dspace.search.DSIndexer;
68
69 /**
70  * MediaFilterManager is the class that invokes the media filters over the
71  * repository's content. a few command line flags affect the operation of the
72  * MFM: -v verbose outputs all extracted text to STDOUT; -f force forces all
73  * bitstreams to be processed, even if they have been before; -n noindex does not
74  * recreate index after processing bitstreams; -i [identifier] limits processing
75  * scope to a community, collection or item; and -m [max] limits processing to a
76  * maximum number of items.
77  */

78 public class MediaFilterManager
79 {
80     public static boolean createIndex = true; // default to creating index
81

82     public static boolean isVerbose = false; // default to not verbose
83

84     public static boolean isForce = false; // default to not forced
85

86     public static String JavaDoc identifier = null; // object scope limiter
87

88     public static int max2Process = Integer.MAX_VALUE; // maximum number to process
89

90     public static int processed = 0; // number processed
91

92     private static MediaFilter[] filterClasses = null;
93     
94     private static Map JavaDoc filterFormats = new HashMap JavaDoc();
95     
96     public static void main(String JavaDoc[] argv) throws Exception JavaDoc
97     {
98         // set headless for non-gui workstations
99
System.setProperty("java.awt.headless", "true");
100
101         // create an options object and populate it
102
CommandLineParser parser = new PosixParser();
103
104         Options options = new Options();
105         
106         options.addOption("v", "verbose", false,
107                 "print all extracted text and other details to STDOUT");
108         options.addOption("f", "force", false,
109                 "force all bitstreams to be processed");
110         options.addOption("n", "noindex", false,
111                 "do NOT re-create search index after filtering bitstreams");
112         options.addOption("i", "identifier", true,
113                 "ONLY process bitstreams belonging to identifier");
114         options.addOption("m", "maximum", true,
115                 "process no more than maximum items");
116         options.addOption("h", "help", false, "help");
117
118         CommandLine line = parser.parse(options, argv);
119
120         if (line.hasOption('h'))
121         {
122             HelpFormatter myhelp = new HelpFormatter();
123             myhelp.printHelp("MediaFilter\n", options);
124
125             System.exit(0);
126         }
127
128         if (line.hasOption('v'))
129         {
130             isVerbose = true;
131         }
132
133         if (line.hasOption('n'))
134         {
135             createIndex = false;
136         }
137
138         if (line.hasOption('f'))
139         {
140             isForce = true;
141         }
142         
143         if (line.hasOption('i'))
144         {
145             identifier = line.getOptionValue('i');
146         }
147         
148         if (line.hasOption('m'))
149         {
150             max2Process = Integer.parseInt(line.getOptionValue('m'));
151             if (max2Process <= 1)
152             {
153                 System.out.println("Invalid maximum value '" +
154                                     line.getOptionValue('m') + "' - ignoring");
155                 max2Process = Integer.MAX_VALUE;
156             }
157         }
158
159         // set up filters
160
filterClasses =
161             (MediaFilter[])PluginManager.getPluginSequence(MediaFilter.class);
162         for (int i = 0; i < filterClasses.length; i++)
163         {
164             String JavaDoc filterName = filterClasses[i].getClass().getName();
165             String JavaDoc formats = ConfigurationManager.getProperty(
166                             "filter." + filterName + ".inputFormats");
167             if (formats != null)
168             {
169                 filterFormats.put(filterName, Arrays.asList(formats.split(",[\\s]*")));
170             }
171         }
172         
173         Context c = null;
174
175         try
176         {
177             c = new Context();
178
179             // have to be super-user to do the filtering
180
c.setIgnoreAuthorization(true);
181
182             // now apply the filters
183
if (identifier == null)
184             {
185                 applyFiltersAllItems(c);
186             }
187             else // restrict application scope to identifier
188
{
189                 DSpaceObject dso = HandleManager.resolveToObject(c, identifier);
190                 if (dso == null)
191                 {
192                     throw new IllegalArgumentException JavaDoc("Cannot resolve "
193                                 + identifier + " to a DSpace object");
194                 }
195                 
196                 switch (dso.getType())
197                 {
198                     case Constants.COMMUNITY:
199                                     applyFiltersCommunity(c, (Community)dso);
200                                     break;
201                     case Constants.COLLECTION:
202                                     applyFiltersCollection(c, (Collection)dso);
203                                     break;
204                     case Constants.ITEM:
205                                     applyFiltersItem(c, (Item)dso);
206                                     break;
207                 }
208             }
209           
210             // create search index?
211
if (createIndex)
212             {
213                 System.out.println("Creating search index:");
214                 DSIndexer.createIndex(c);
215             }
216
217             c.complete();
218             c = null;
219         }
220         finally
221         {
222             if (c != null)
223             {
224                 c.abort();
225             }
226         }
227     }
228
229     public static void applyFiltersAllItems(Context c) throws Exception JavaDoc
230     {
231         ItemIterator i = Item.findAll(c);
232         while (i.hasNext() && processed < max2Process)
233         {
234             applyFiltersItem(c, i.next());
235         }
236     }
237     
238     public static void applyFiltersCommunity(Context c, Community community)
239                                              throws Exception JavaDoc
240     {
241         Community[] subcommunities = community.getSubcommunities();
242         for (int i = 0; i < subcommunities.length; i++)
243         {
244             applyFiltersCommunity(c, subcommunities[i]);
245         }
246         
247         Collection[] collections = community.getCollections();
248         for (int j = 0; j < collections.length; j++)
249         {
250             applyFiltersCollection(c, collections[j]);
251         }
252     }
253         
254     public static void applyFiltersCollection(Context c, Collection collection)
255                                               throws Exception JavaDoc
256     {
257         ItemIterator i = collection.getItems();
258         while (i.hasNext() && processed < max2Process)
259         {
260             applyFiltersItem(c, i.next());
261         }
262     }
263        
264     public static void applyFiltersItem(Context c, Item item) throws Exception JavaDoc
265     {
266           if (filterItem(c, item))
267           {
268               // commit changes after each filtered item
269
c.commit();
270               // increment processed count
271
++processed;
272           }
273           // clear item objects from context cache
274
item.decache();
275     }
276
277     /**
278      * iterate through the item's bitstreams in the ORIGINAL bundle, applying
279      * filters if possible
280      *
281      * @return true if any bitstreams processed,
282      * false if none
283      */

284     public static boolean filterItem(Context c, Item myItem) throws Exception JavaDoc
285     {
286         // get 'original' bundles
287
Bundle[] myBundles = myItem.getBundles("ORIGINAL");
288         boolean done = false;
289         for (int i = 0; i < myBundles.length; i++)
290         {
291             // now look at all of the bitstreams
292
Bitstream[] myBitstreams = myBundles[i].getBitstreams();
293             
294             for (int k = 0; k < myBitstreams.length; k++)
295             {
296                 done |= filterBitstream(c, myItem, myBitstreams[k]);
297             }
298         }
299         return done;
300     }
301
302     /**
303      * Attempt to filter a bitstream
304      *
305      * An exception will be thrown if the media filter class cannot be
306      * instantiated, exceptions from filtering will be logged to STDOUT and
307      * swallowed.
308      *
309      * @return true if bitstream processed,
310      * false if no applicable filter or already processed
311      */

312     public static boolean filterBitstream(Context c, Item myItem,
313             Bitstream myBitstream) throws Exception JavaDoc
314     {
315         boolean filtered = false;
316         
317         // iterate through filter classes. A single format may be actioned
318
// by more than one filter
319
for (int i = 0; i < filterClasses.length; i++)
320         {
321             List JavaDoc fmts = (List JavaDoc)filterFormats.get(filterClasses[i].getClass().getName());
322             if (fmts.contains(myBitstream.getFormat().getShortDescription()))
323             {
324                 try
325                 {
326                     // only update item if bitstream not skipped
327
if (filterClasses[i].processBitstream(c, myItem, myBitstream))
328                     {
329                         myItem.update(); // Make sure new bitstream has a sequence
330
// number
331
filtered = true;
332                     }
333                 }
334                 catch (Exception JavaDoc e)
335                 {
336                     System.out.println("ERROR filtering, skipping bitstream #"
337                             + myBitstream.getID() + " " + e);
338                     e.printStackTrace();
339                 }
340             }
341         }
342         return filtered;
343     }
344 }
Popular Tags