KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > checker > CheckerCommand


1 /*
2  * Copyright (c) 2004-2005, Hewlett-Packard Company and Massachusetts
3  * Institute of Technology. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * - Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * - Neither the name of the Hewlett-Packard Company nor the name of the
17  * Massachusetts Institute of Technology nor the names of their
18  * contributors may be used to endorse or promote products derived from
19  * this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
28  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
29  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
30  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
31  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
32  * DAMAGE.
33  */

34 package org.dspace.checker;
35
36 import java.io.File JavaDoc;
37 import java.io.IOException JavaDoc;
38 import java.io.InputStream JavaDoc;
39 import java.security.DigestInputStream JavaDoc;
40 import java.security.MessageDigest JavaDoc;
41 import java.security.NoSuchAlgorithmException JavaDoc;
42 import java.sql.SQLException JavaDoc;
43 import java.util.Date JavaDoc;
44
45 import org.apache.log4j.Logger;
46 import org.apache.log4j.PropertyConfigurator;
47 import org.dspace.core.ConfigurationManager;
48 import org.dspace.core.Utils;
49
50 /**
51  * <p>
52  * Main class for the checksum checker tool, which calculates checksums for each
53  * bitstream whose ID is in the most_recent_checksum table, and compares it
54  * against the last calculated checksum for that bitstream.
55  * </p>
56  *
57  * @author Jim Downing
58  * @author Grace Carpenter
59  * @author Nathan Sarr
60  *
61  *
62  * @todo the accessor methods are currently unused - are they useful?
63  * @todo check for any existing resource problems
64  */

65 public final class CheckerCommand
66 {
67     /** Usual Log4J logger. */
68     private static final Logger LOG = Logger.getLogger(CheckerCommand.class);
69
70     /** Default digest algorithm (MD5). */
71     private static final String JavaDoc DEFAULT_DIGEST_ALGORITHM = "MD5";
72
73     /** 4 Meg byte array for reading file. */
74     private int BYTE_ARRAY_SIZE = 4 * 1024;
75
76     /** BitstreamInfoDAO dependency. */
77     private BitstreamInfoDAO bitstreamInfoDAO = null;
78
79     /** BitstreamDAO dependency. */
80     private BitstreamDAO bitstreamDAO = null;
81
82     /**
83      * Checksum history Data access object
84      */

85     private ChecksumHistoryDAO checksumHistoryDAO = null;
86
87     /** start time for current process. */
88     private Date JavaDoc processStartDate = null;
89
90     /**
91      * Dispatcher to be used for processing run.
92      */

93     private BitstreamDispatcher dispatcher = null;
94
95     /**
96      * Container/logger with details about each bitstream and checksum results.
97      */

98     private ChecksumResultsCollector collector = null;
99
100     /** Report all processing */
101     private boolean reportVerbose = false;
102
103     /**
104      * Default constructor uses DSpace plugin manager to construct dependencies.
105      */

106     public CheckerCommand()
107     {
108         bitstreamInfoDAO = new BitstreamInfoDAO();
109         bitstreamDAO = new BitstreamDAO();
110         checksumHistoryDAO = new ChecksumHistoryDAO();
111     }
112
113     /**
114      * <p>
115      * Uses the options set up on this checker to determine a mode of execution,
116      * and then accepts bitstream ids from the dispatcher and checks their
117      * bitstreams against the db records.
118      * </p>
119      *
120      * <p>
121      * N.B. a valid BitstreamDispatcher must be provided using
122      * setBitstreamDispatcher before calling this method
123      * </p>
124      */

125     public void process()
126     {
127         LOG.debug("Begin Checker Processing");
128
129         if (dispatcher == null)
130         {
131             throw new IllegalStateException JavaDoc("No BitstreamDispatcher provided");
132         }
133
134         if (collector == null)
135         {
136             collector = new ResultsLogger(processStartDate);
137         }
138
139         // update missing bitstreams that were entered into the
140
// bitstream table - this always done.
141
bitstreamInfoDAO.updateMissingBitstreams();
142
143         int id = dispatcher.next();
144
145         while (id != BitstreamDispatcher.SENTINEL)
146         {
147             LOG.debug("Processing bitstream id = " + id);
148             BitstreamInfo info = checkBitstream(id);
149
150             if (reportVerbose
151                     || (info.getChecksumCheckResult() != ChecksumCheckResults.CHECKSUM_MATCH))
152             {
153                 collector.collect(info);
154             }
155
156             id = dispatcher.next();
157         }
158     }
159
160     /**
161      * Check a specified bitstream.
162      *
163      * @param id
164      * the bitstream id
165      *
166      * @return the information about the bitstream and its checksum data
167      */

168     private BitstreamInfo checkBitstream(final int id)
169     {
170         // get bitstream info from bitstream table
171
BitstreamInfo info = bitstreamInfoDAO.findByBitstreamId(id);
172
173         // requested id was not found in bitstream
174
// or most_recent_checksum table
175
if (info == null)
176         {
177             // Note: this case should only occur if id is requested at
178
// command line, since ref integrity checks should
179
// prevent id from appearing in most_recent_checksum
180
// but not bitstream table, or vice versa
181
info = new BitstreamInfo(id);
182             processNullInfoBitstream(info);
183         }
184         else if (!info.getToBeProcessed())
185         {
186             // most_recent_checksum.to_be_processed is marked
187
// 'false' for this bitstream id.
188
// Do not do any db updates
189
info
190                     .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_NOT_PROCESSED);
191         }
192         else if (info.getDeleted())
193         {
194             // bitstream id is marked 'deleted' in bitstream table.
195
processDeletedBitstream(info);
196         }
197         else
198         {
199             processBitstream(info);
200         }
201
202         return info;
203     }
204
205     /**
206      * Digest the stream and get the checksum value.
207      *
208      * @param stream
209      * InputStream to digest.
210      * @param algorithm
211      * the algorithm to use when digesting.
212      * @todo Document the algorithm parameter
213      * @return digest
214      *
215      * @throws java.security.NoSuchAlgorithmException
216      * if the requested algorithm is not provided by the system
217      * security provider.
218      * @throws java.io.IOException
219      * If an exception arises whilst reading the stream
220      */

221     private String JavaDoc digestStream(InputStream JavaDoc stream, String JavaDoc algorithm)
222             throws java.security.NoSuchAlgorithmException JavaDoc, java.io.IOException JavaDoc
223     {
224         // create the digest stream
225
DigestInputStream JavaDoc dStream = new DigestInputStream JavaDoc(stream, MessageDigest
226                 .getInstance(algorithm));
227
228         byte[] bytes = new byte[BYTE_ARRAY_SIZE];
229
230         // make sure all the data is read by the digester
231
while (dStream.read(bytes, 0, BYTE_ARRAY_SIZE) != -1)
232         {
233             // no-op
234
}
235
236         return Utils.toHex(dStream.getMessageDigest().digest());
237     }
238
239     /**
240      * Compares two checksums.
241      *
242      * @param checksumA
243      * the first checksum
244      * @param checksumB
245      * the second checksum
246      *
247      * @return a result code (constants defined in Util)
248      */

249     private String JavaDoc compareChecksums(String JavaDoc checksumA, String JavaDoc checksumB)
250     {
251         String JavaDoc result = ChecksumCheckResults.CHECKSUM_NO_MATCH;
252
253         if ((checksumA == null) || (checksumB == null))
254         {
255             result = ChecksumCheckResults.CHECKSUM_PREV_NOT_FOUND;
256         }
257         else if (checksumA.equals(checksumB))
258         {
259             result = ChecksumCheckResults.CHECKSUM_MATCH;
260         }
261
262         return result;
263     }
264
265     /**
266      * Process bitstream that was marked 'deleted' in bitstream table. A deleted
267      * bitstream should only be checked once afterwards it should be marked
268      * 'to_be_processed=false'. Note that to_be_processed must be manually
269      * updated in db to allow for future processing.
270      *
271      * @param info
272      * a deleted bitstream.
273      */

274     private void processDeletedBitstream(BitstreamInfo info)
275     {
276         info.setProcessStartDate(new Date JavaDoc());
277         info
278                 .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_MARKED_DELETED);
279         info.setProcessStartDate(new Date JavaDoc());
280         info.setProcessEndDate(new Date JavaDoc());
281         info.setToBeProcessed(false);
282         bitstreamInfoDAO.update(info);
283         checksumHistoryDAO.insertHistory(info);
284     }
285
286     /**
287      * Process bitstream whose ID was not found in most_recent_checksum or
288      * bitstream table. No updates can be done. The missing bitstream is output
289      * to the log file.
290      *
291      * @param info
292      * A not found BitStreamInfo
293      * @todo is this method required?
294      */

295     private void processNullInfoBitstream(BitstreamInfo info)
296     {
297         info.setInfoFound(false);
298         info.setProcessStartDate(new Date JavaDoc());
299         info.setProcessEndDate(new Date JavaDoc());
300         info
301                 .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_INFO_NOT_FOUND);
302     }
303
304     /**
305      * <p>
306      * Process general case bistream.
307      * </p>
308      *
309      * <p>
310      * Note: bitstream will have timestamp indicating it was "checked", even if
311      * actual checksumming never took place.
312      * </p>
313      *
314      * @todo Why does bitstream have a timestamp indicating it's checked if
315      * checksumming doesn't occur?
316      *
317      * @param info
318      * BitstreamInfo to handle
319      */

320     private void processBitstream(BitstreamInfo info)
321     {
322         info.setProcessStartDate(new Date JavaDoc());
323
324         if (info.getChecksumAlgorithm() == null)
325         {
326             info.setChecksumAlgorithm(DEFAULT_DIGEST_ALGORITHM);
327         }
328
329         try
330         {
331             InputStream JavaDoc bitstream = bitstreamDAO.getBitstream(info
332                     .getBitstreamId());
333
334             info.setBitstreamFound(true);
335
336             String JavaDoc checksum = digestStream(bitstream, info
337                     .getChecksumAlgorithm());
338
339             info.setCalculatedChecksum(checksum);
340
341             // compare new checksum to previous checksum
342
info.setChecksumCheckResult(compareChecksums(info
343                     .getStoredChecksum(), info.getCalculatedChecksum()));
344         }
345         catch (IOException JavaDoc e)
346         {
347             // bitstream located, but file missing from asset store
348
info
349                     .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_NOT_FOUND);
350             info.setToBeProcessed(false);
351             LOG.error("Error retrieving bitstream ID " + info.getBitstreamId()
352                     + " from " + "asset store.", e);
353         }
354         catch (SQLException JavaDoc e)
355         {
356             // ??this code only executes if an sql
357
// exception occurs in *DSpace* code, probably
358
// indicating a general db problem?
359
info
360                     .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_INFO_NOT_FOUND);
361             LOG.error("Error retrieving metadata for bitstream ID "
362                     + info.getBitstreamId(), e);
363         }
364         catch (NoSuchAlgorithmException JavaDoc e)
365         {
366             info
367                     .setChecksumCheckResult(ChecksumCheckResults.CHECKSUM_ALGORITHM_INVALID);
368             info.setToBeProcessed(false);
369             LOG.error("Invalid digest algorithm type for bitstream ID"
370                     + info.getBitstreamId(), e);
371         }
372         finally
373         {
374             info.setProcessEndDate(new Date JavaDoc());
375
376             // record new checksum and comparison result in db
377
bitstreamInfoDAO.update(info);
378             checksumHistoryDAO.insertHistory(info);
379         }
380     }
381
382     /**
383      * Get dispatcher being used by this run of the checker.
384      *
385      * @return the dispatcher being used by this run.
386      */

387     public BitstreamDispatcher getDispatcher()
388     {
389         return dispatcher;
390     }
391
392     /**
393      * Set the dispatcher to be used by this run of the checker.
394      *
395      * @param dispatcher
396      * Dispatcher to use.
397      */

398     public void setDispatcher(BitstreamDispatcher dispatcher)
399     {
400         this.dispatcher = dispatcher;
401     }
402
403     /**
404      * Get the collector that holds/logs the results for this process run.
405      *
406      * @return The ChecksumResultsCollecter being used.
407      */

408     public ChecksumResultsCollector getCollector()
409     {
410         return collector;
411     }
412
413     /**
414      * Set the collector that holds/logs the results for this process run.
415      *
416      * @param collector
417      * the collector to be used for this run
418      */

419     public void setCollector(ChecksumResultsCollector collector)
420     {
421         this.collector = collector;
422     }
423
424     /**
425      * Get time at which checker process began.
426      *
427      * @return start time
428      */

429     public Date JavaDoc getProcessStartDate()
430     {
431         return processStartDate;
432     }
433
434     /**
435      * Set time at which checker process began.
436      *
437      * @param startDate
438      * start time
439      */

440     public void setProcessStartDate(Date JavaDoc startDate)
441     {
442         processStartDate = startDate;
443     }
444
445     /**
446      * Configures log4j to read config info from DSpace log4j.properties file.
447      */

448     public void configureLog()
449     {
450         // Load in log4j config
451
String JavaDoc log4jConf = ConfigurationManager.getProperty("dspace.dir")
452                 + File.separator + "config" + File.separator
453                 + "log4j.properties";
454
455         PropertyConfigurator.configure(log4jConf);
456     }
457
458     /**
459      * Determine if ony erros are reported
460      *
461      * @return true if only errors reported
462      */

463     public boolean isReportVerbose()
464     {
465         return reportVerbose;
466     }
467
468     /**
469      * Set report errors only
470      *
471      * @param reportErrorsOnly
472      * true to report only errors in the logs.
473      */

474     public void setReportVerbose(boolean reportVerbose)
475     {
476         this.reportVerbose = reportVerbose;
477     }
478 }
479
Popular Tags