KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sleepycat > je > recovery > Checkpointer


1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2002,2006 Oracle. All rights reserved.
5  *
6  * $Id: Checkpointer.java,v 1.139 2006/11/27 23:15:04 mark Exp $
7  */

8
9 package com.sleepycat.je.recovery;
10
11 import java.util.Iterator JavaDoc;
12 import java.util.Set JavaDoc;
13 import java.util.logging.Level JavaDoc;
14
15 import com.sleepycat.je.CheckpointConfig;
16 import com.sleepycat.je.DatabaseException;
17 import com.sleepycat.je.DbInternal;
18 import com.sleepycat.je.EnvironmentStats;
19 import com.sleepycat.je.StatsConfig;
20 import com.sleepycat.je.cleaner.Cleaner;
21 import com.sleepycat.je.cleaner.TrackedFileSummary;
22 import com.sleepycat.je.cleaner.UtilizationProfile;
23 import com.sleepycat.je.cleaner.FileSelector.CheckpointStartCleanerState;
24 import com.sleepycat.je.config.EnvironmentParams;
25 import com.sleepycat.je.dbi.DatabaseImpl;
26 import com.sleepycat.je.dbi.DbConfigManager;
27 import com.sleepycat.je.dbi.DbTree;
28 import com.sleepycat.je.dbi.EnvironmentImpl;
29 import com.sleepycat.je.log.LogManager;
30 import com.sleepycat.je.tree.BIN;
31 import com.sleepycat.je.tree.ChildReference;
32 import com.sleepycat.je.tree.IN;
33 import com.sleepycat.je.tree.Node;
34 import com.sleepycat.je.tree.SearchResult;
35 import com.sleepycat.je.tree.Tree;
36 import com.sleepycat.je.tree.WithRootLatched;
37 import com.sleepycat.je.utilint.DaemonThread;
38 import com.sleepycat.je.utilint.DbLsn;
39 import com.sleepycat.je.utilint.PropUtil;
40 import com.sleepycat.je.utilint.Tracer;
41
42 /**
43  * The Checkpointer looks through the tree for internal nodes that must be
44  * flushed to the log. Checkpoint flushes must be done in ascending order from
45  * the bottom of the tree up.
46  */

47 public class Checkpointer extends DaemonThread {
48
49     private EnvironmentImpl envImpl;
50
51     /* Checkpoint sequence, initialized at recovery. */
52     private long checkpointId;
53
54     /*
55      * How much the log should grow between checkpoints. If 0, we're using time
56      * based checkpointing.
57      */

58     private long logSizeBytesInterval;
59     private long logFileMax;
60     private long timeInterval;
61     private long lastCheckpointMillis;
62
63     private volatile int highestFlushLevel;
64
65     private int nCheckpoints;
66     private long lastFirstActiveLsn;
67     private long lastCheckpointStart;
68     private long lastCheckpointEnd;
69     private FlushStats flushStats;
70
71     public Checkpointer(EnvironmentImpl envImpl,
72                         long waitTime,
73                         String JavaDoc name)
74         throws DatabaseException {
75
76         super(waitTime, name, envImpl);
77         this.envImpl = envImpl;
78         logSizeBytesInterval =
79             envImpl.getConfigManager().getLong
80                 (EnvironmentParams.CHECKPOINTER_BYTES_INTERVAL);
81         logFileMax =
82             envImpl.getConfigManager().getLong(EnvironmentParams.LOG_FILE_MAX);
83         timeInterval = waitTime;
84         lastCheckpointMillis = 0;
85         
86         nCheckpoints = 0;
87         flushStats = new FlushStats();
88
89     highestFlushLevel = IN.MIN_LEVEL;
90     }
91
92     /**
93      * Initializes the checkpoint intervals when no checkpoint is performed
94      * while opening the environment.
95      */

96     public void initIntervals(long lastCheckpointEnd,
97                               long lastCheckpointMillis) {
98         this.lastCheckpointEnd = lastCheckpointEnd;
99         this.lastCheckpointMillis = lastCheckpointMillis;
100     }
101
102     public int getHighestFlushLevel() {
103     return highestFlushLevel;
104     }
105
106     /**
107      * Figure out the wakeup period. Supplied through this static method
108      * because we need to pass wakeup period to the superclass and need to do
109      * the calcuation outside this constructor.
110      */

111     public static long getWakeupPeriod(DbConfigManager configManager)
112         throws IllegalArgumentException JavaDoc, DatabaseException {
113
114         long wakeupPeriod = PropUtil.microsToMillis
115             (configManager.getLong
116                 (EnvironmentParams.CHECKPOINTER_WAKEUP_INTERVAL));
117         long bytePeriod = configManager.getLong
118             (EnvironmentParams.CHECKPOINTER_BYTES_INTERVAL);
119
120         /* Checkpointing period must be set either by time or by log size. */
121         if ((wakeupPeriod == 0) && (bytePeriod == 0)) {
122             throw new IllegalArgumentException JavaDoc
123                 (EnvironmentParams.CHECKPOINTER_BYTES_INTERVAL.getName() +
124                  " and " +
125                  EnvironmentParams.CHECKPOINTER_WAKEUP_INTERVAL.getName() +
126                  " cannot both be 0. ");
127         }
128
129         /*
130          * Checkpointing by log size takes precendence over time based period.
131          */

132         if (bytePeriod == 0) {
133             return wakeupPeriod;
134         } else {
135             return 0;
136         }
137     }
138
139     /**
140      * Set checkpoint id -- can only be done after recovery.
141      */

142     synchronized public void setCheckpointId(long lastCheckpointId) {
143         checkpointId = lastCheckpointId;
144     }
145
146     public String JavaDoc toString() {
147         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
148         sb.append("<Checkpointer name=\"").append(name).append("\"/>");
149         return sb.toString();
150     }
151
152     /**
153      * Load stats.
154      */

155     public void loadStats(StatsConfig config, EnvironmentStats stat)
156         throws DatabaseException {
157
158         stat.setNCheckpoints(nCheckpoints);
159         stat.setLastCheckpointStart(lastCheckpointStart);
160         stat.setLastCheckpointEnd(lastCheckpointEnd);
161         stat.setLastCheckpointId(checkpointId);
162         stat.setNFullINFlush(flushStats.nFullINFlush);
163         stat.setNFullBINFlush(flushStats.nFullBINFlush);
164         stat.setNDeltaINFlush(flushStats.nDeltaINFlush);
165         
166         if (config.getClear()) {
167             nCheckpoints = 0;
168             flushStats.nFullINFlush = 0;
169             flushStats.nFullBINFlush = 0;
170             flushStats.nDeltaINFlush = 0;
171         }
172     }
173     
174     /**
175      * @return the first active LSN point of the last completed checkpoint.
176      * If no checkpoint has run, return null.
177      */

178     public long getFirstActiveLsn() {
179         return lastFirstActiveLsn;
180     }
181
182     /**
183      * Initialize the FirstActiveLsn during recovery. The cleaner needs this.
184      */

185     public void setFirstActiveLsn(long lastFirstActiveLsn) {
186         this.lastFirstActiveLsn = lastFirstActiveLsn;
187     }
188
189     synchronized public void clearEnv() {
190         envImpl = null;
191     }
192
193     /**
194      * Return the number of retries when a deadlock exception occurs.
195      */

196     protected int nDeadlockRetries()
197         throws DatabaseException {
198
199         return envImpl.getConfigManager().getInt
200             (EnvironmentParams.CHECKPOINTER_RETRY);
201     }
202
203     /**
204      * Called whenever the DaemonThread wakes up from a sleep.
205      */

206     protected void onWakeup()
207         throws DatabaseException {
208
209         if (envImpl.isClosed()) {
210             return;
211         }
212
213         doCheckpoint(CheckpointConfig.DEFAULT,
214                      false, // flushAll
215
"daemon");
216     }
217
218     /**
219      * Wakes up the checkpointer if a checkpoint log interval is configured and
220      * the number of bytes written since the last checkpoint exeeds the size
221      * of the interval.
222      */

223     public void wakeupAfterWrite() {
224         if (logSizeBytesInterval != 0) {
225             long nextLsn = envImpl.getFileManager().getNextLsn();
226             if (DbLsn.getNoCleaningDistance
227                     (nextLsn, lastCheckpointEnd, logFileMax) >=
228                     logSizeBytesInterval) {
229                 wakeup();
230             }
231         }
232     }
233
234     /**
235      * Determine whether a checkpoint should be run.
236      *
237      * 1. If the force parameter is specified, always checkpoint.
238      *
239      * 2. If the config object specifies time or log size, use that.
240      *
241      * 3. If the environment is configured to use log size based checkpointing,
242      * check the log.
243      *
244      * 4. Lastly, use time based checking.
245      */

246     private boolean isRunnable(CheckpointConfig config)
247         throws DatabaseException {
248
249         /* Figure out if we're using log size or time to determine interval.*/
250         long useBytesInterval = 0;
251         long useTimeInterval = 0;
252         long nextLsn = DbLsn.NULL_LSN;
253         boolean runnable = false;
254         try {
255             if (config.getForce()) {
256                 runnable = true;
257                 return runnable;
258             } else if (config.getKBytes() != 0) {
259                 useBytesInterval = config.getKBytes() << 10;
260             } else if (config.getMinutes() != 0) {
261                 // convert to millis
262
useTimeInterval = config.getMinutes() * 60 * 1000;
263             } else if (logSizeBytesInterval != 0) {
264                 useBytesInterval = logSizeBytesInterval;
265             } else {
266                 useTimeInterval = timeInterval;
267             }
268
269             /*
270              * If our checkpoint interval is defined by log size, check on how
271              * much log has grown since the last checkpoint.
272              */

273             if (useBytesInterval != 0) {
274                 nextLsn = envImpl.getFileManager().getNextLsn();
275                 if (DbLsn.getNoCleaningDistance(nextLsn, lastCheckpointEnd,
276                         logFileMax) >=
277                     useBytesInterval) {
278                     runnable = true;
279                 } else {
280                     runnable = false;
281                 }
282             } else if (useTimeInterval != 0) {
283
284                 /*
285                  * Our checkpoint is determined by time. If enough time has
286                  * passed and some log data has been written, do a checkpoint.
287                  */

288                 long lastUsedLsn = envImpl.getFileManager().getLastUsedLsn();
289                 if (((System.currentTimeMillis() - lastCheckpointMillis) >=
290                      useTimeInterval) &&
291                     (DbLsn.compareTo(lastUsedLsn, lastCheckpointEnd) != 0)) {
292                     runnable = true;
293                 } else {
294                     runnable = false;
295                 }
296             } else {
297                 runnable = false;
298             }
299             return runnable;
300         } finally {
301             StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
302             sb.append("size interval=").append(useBytesInterval);
303             if (nextLsn != DbLsn.NULL_LSN) {
304                 sb.append(" nextLsn=").
305             append(DbLsn.getNoFormatString(nextLsn));
306             }
307             if (lastCheckpointEnd != DbLsn.NULL_LSN) {
308                 sb.append(" lastCkpt=");
309                 sb.append(DbLsn.getNoFormatString(lastCheckpointEnd));
310             }
311             sb.append(" time interval=").append(useTimeInterval);
312             sb.append(" force=").append(config.getForce());
313             sb.append(" runnable=").append(runnable);
314             
315             Tracer.trace(Level.FINEST,
316                          envImpl,
317                          sb.toString());
318         }
319     }
320
321     /**
322      * The real work to do a checkpoint. This may be called by the checkpoint
323      * thread when waking up, or it may be invoked programatically through the
324      * api.
325      *
326      * @param flushAll if true, this checkpoint must flush all the way to
327      * the top of the dbtree, instead of stopping at the highest level
328      * last modified.
329      * @param invokingSource a debug aid, to indicate who invoked this
330      * checkpoint. (i.e. recovery, the checkpointer daemon, the cleaner,
331      * programatically)
332      */

333     public synchronized void doCheckpoint(CheckpointConfig config,
334                       boolean flushAll,
335                       String JavaDoc invokingSource)
336         throws DatabaseException {
337
338         if (envImpl.isReadOnly()) {
339             return;
340         }
341
342     if (!isRunnable(config)) {
343         return;
344     }
345
346         /*
347          * If there are cleaned files to be deleted, flush an extra level to
348          * write out the parents of cleaned nodes. This ensures that the node
349          * will contain the LSN of a cleaned files.
350          */

351         boolean flushExtraLevel = false;
352         Cleaner cleaner = envImpl.getCleaner();
353         CheckpointStartCleanerState cleanerState =
354             cleaner.getFilesAtCheckpointStart();
355         if (!cleanerState.isEmpty()) {
356             flushExtraLevel = true;
357         }
358
359         lastCheckpointMillis = System.currentTimeMillis();
360         flushStats.resetPerRunCounters();
361
362         /* Get the next checkpoint id. */
363         checkpointId++;
364         nCheckpoints++;
365
366         boolean success = false;
367         boolean traced = false;
368
369         LogManager logManager = envImpl.getLogManager();
370
371         /* dirtyMap keeps track of the INs to be written out by the ckpt. */
372         DirtyINMap dirtyMap = new DirtyINMap(envImpl);
373         try {
374
375         /*
376          * Eviction can run during checkpoint as long as it follows the
377          * same rules for using provisional logging and for propagating
378          * logging of the checkpoint dirty set up the tree. We have to lock
379          * out the evictor after the logging of checkpoint start until
380          * we've selected the dirty set and decided on the highest level to
381          * be flushed. See SR 11163, 11349.
382          */

383         long checkpointStart = DbLsn.NULL_LSN;
384         long firstActiveLsn = DbLsn.NULL_LSN;
385
386         synchronized (envImpl.getEvictor()) {
387
388         /* Log the checkpoint start. */
389         CheckpointStart startEntry =
390             new CheckpointStart(checkpointId, invokingSource);
391         checkpointStart = logManager.log(startEntry);
392
393         /*
394          * Note the first active LSN point. The definition of
395                  * firstActiveLsn is that all log entries for active
396                  * transactions are equal to or after that LSN.
397          */

398         firstActiveLsn = envImpl.getTxnManager().getFirstActiveLsn();
399
400         if (firstActiveLsn == DbLsn.NULL_LSN) {
401             firstActiveLsn = checkpointStart;
402         } else {
403             if (DbLsn.compareTo(checkpointStart, firstActiveLsn) < 0) {
404             firstActiveLsn = checkpointStart;
405             }
406         }
407
408         /* Find the set of dirty INs that must be logged. */
409                 dirtyMap.selectDirtyINsForCheckpoint
410                     (cleanerState.getDeferredWriteDbs());
411         }
412
413             /*
414              * Add the dirty map to the memory budget, outside the evictor
415              * synchronization section.
416              */

417             dirtyMap.addCostToMemoryBudget();
418
419             /*
420              * Figure out the highest flush level. If we're flushing all for
421              * cleaning, we must flush to the point that there are no nodes
422              * with LSNs in the cleaned files.
423              */

424             if (dirtyMap.getNumLevels() > 0) {
425                 if (flushAll) {
426                     highestFlushLevel =
427             envImpl.getDbMapTree().getHighestLevel();
428                 } else {
429                     highestFlushLevel = dirtyMap.getHighestLevel();
430                     if (flushExtraLevel) {
431                         highestFlushLevel += 1;
432                     }
433                 }
434             } else {
435         highestFlushLevel = IN.MAX_LEVEL;
436         }
437
438             /* Flush IN nodes. */
439             boolean allowDeltas = !config.getMinimizeRecoveryTime();
440             boolean cleaningDeferredWriteDbs =
441                 (cleanerState.getDeferredWriteDbsSize() > 0);
442             flushDirtyNodes(envImpl, dirtyMap, allowDeltas,
443                             checkpointStart, highestFlushLevel,
444                             flushStats, cleaningDeferredWriteDbs);
445
446             /*
447              * Flush utilization info AFTER flushing IN nodes to reduce the
448              * inaccuracies caused by the sequence FileSummaryLN-LN-BIN.
449              */

450             flushUtilizationInfo();
451
452             CheckpointEnd endEntry =
453                 new CheckpointEnd(invokingSource,
454                                   checkpointStart,
455                                   envImpl.getRootLsn(),
456                                   firstActiveLsn,
457                                   Node.getLastId(),
458                                   envImpl.getDbMapTree().getLastDbId(),
459                                   envImpl.getTxnManager().getLastTxnId(),
460                                   checkpointId);
461
462             /*
463              * Log checkpoint end and update state kept about the last
464              * checkpoint location. Send a trace message *before* the
465              * checkpoint end log entry. This is done so that the normal trace
466              * message doesn't affect the time-based isRunnable() calculation,
467              * which only issues a checkpoint if a log record has been written
468              * since the last checkpoint.
469              */

470             trace(envImpl, invokingSource, true);
471             traced = true;
472
473             /*
474              * Always flush to ensure that cleaned files are not referenced,
475              * and to ensure that this checkpoint is not wasted if we crash.
476              */

477             lastCheckpointEnd =
478                 logManager.logForceFlush(endEntry,
479                                          true); // fsync required
480
lastFirstActiveLsn = firstActiveLsn;
481             lastCheckpointStart = checkpointStart;
482
483         /*
484          * Reset the highestFlushLevel so evictor activity knows there's no
485          * further requirement for provisional logging. SR 11163.
486          */

487         highestFlushLevel = IN.MIN_LEVEL;
488
489             success = true;
490             cleaner.updateFilesAtCheckpointEnd(cleanerState);
491
492         } catch (DatabaseException e) {
493             Tracer.trace(envImpl, "Checkpointer", "doCheckpoint",
494                          "checkpointId=" + checkpointId, e);
495             throw e;
496         } finally {
497             dirtyMap.removeCostFromMemoryBudget();
498
499             if (!traced) {
500                 trace(envImpl, invokingSource, success);
501             }
502         }
503     }
504
505     private void trace(EnvironmentImpl envImpl,
506                        String JavaDoc invokingSource,
507                        boolean success ) {
508         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
509         sb.append("Checkpoint ").append(checkpointId);
510         sb.append(": source=" ).append(invokingSource);
511         sb.append(" success=").append(success);
512         sb.append(" nFullINFlushThisRun=");
513         sb.append(flushStats.nFullINFlushThisRun);
514         sb.append(" nDeltaINFlushThisRun=");
515         sb.append(flushStats.nDeltaINFlushThisRun);
516         Tracer.trace(Level.CONFIG, envImpl, sb.toString());
517     }
518
519     /**
520      * Flush a FileSummaryLN node for each TrackedFileSummary that is currently
521      * active. Tell the UtilizationProfile about the updated file summary.
522      */

523     private void flushUtilizationInfo()
524         throws DatabaseException {
525
526         /* Utilization flushing may be disabled for unittests. */
527         if (!DbInternal.getCheckpointUP
528         (envImpl.getConfigManager().getEnvironmentConfig())) {
529             return;
530         }
531         
532         UtilizationProfile profile = envImpl.getUtilizationProfile();
533
534         TrackedFileSummary[] activeFiles =
535             envImpl.getUtilizationTracker().getTrackedFiles();
536
537         for (int i = 0; i < activeFiles.length; i += 1) {
538             profile.flushFileSummary(activeFiles[i]);
539         }
540     }
541
542     /**
543      * Flush a given database to disk. Like checkpoint, log from the bottom
544      * up so that parents properly represent their children.
545      */

546     public static void syncDatabase(EnvironmentImpl envImpl,
547                                     DatabaseImpl dbImpl,
548                                     boolean flushLog)
549         throws DatabaseException {
550
551         if (envImpl.isReadOnly()) {
552             return;
553         }
554
555         DirtyINMap dirtyMap = new DirtyINMap(envImpl);
556         FlushStats fstats = new FlushStats();
557         try {
558         /*
559          * Lock out eviction and other checkpointing during the
560              * selection of a dirty set.
561          */

562         synchronized (envImpl.getEvictor()) {
563         /* Find the dirty set. */
564         dirtyMap.selectDirtyINsForDb(dbImpl);
565         }
566
567             dirtyMap.addCostToMemoryBudget();
568
569             /* Write all dirtyINs out.*/
570             flushDirtyNodes(envImpl,
571                             dirtyMap,
572                             false, /* allowDeltas */
573                             0, /* ckpt start, only needed for allowDeltas*/
574                             envImpl.getDbMapTree().getHighestLevel(dbImpl),
575                             fstats,
576                             false); /* cleaning deferred write dbs */
577
578             /* Make changes durable. [#15254] */
579             if (flushLog) {
580                 envImpl.getLogManager().flush();
581             }
582         } catch (DatabaseException e) {
583             Tracer.trace(envImpl, "Checkpointer", "syncDatabase",
584                          "of " + dbImpl.getDebugName(), e);
585             throw e;
586         } finally {
587             dirtyMap.removeCostFromMemoryBudget();
588         }
589     }
590
591     /**
592      * Flush the nodes in order, from the lowest level to highest level. As a
593      * flush dirties its parent, add it to the dirty map, thereby cascading the
594      * writes up the tree. If flushAll wasn't specified, we need only cascade
595      * up to the highest level set at the start of checkpointing.
596      *
597      * Note that all but the top level INs and the BINDeltas are logged
598      * provisionally. That's because we don't need to process lower INs during
599      * recovery because the higher INs will end up pointing at them.
600      */

601     private static void flushDirtyNodes(EnvironmentImpl envImpl,
602                                         DirtyINMap dirtyMap,
603                                         boolean allowDeltas,
604                                         long checkpointStart,
605                                         int maxFlushLevel,
606                                         FlushStats fstats,
607                                         boolean cleaningDeferredWriteDbs)
608         throws DatabaseException {
609
610         LogManager logManager = envImpl.getLogManager();
611
612         /*
613          * In general, we flush until we reach the maxFlushLevel. If we're
614          * cleaning deferred write dbs, we sync only those dbs all the way up
615          * to the root. onlyFlushDeferredWriteDbs is true when we're above
616          * maxFlushLevel, but are still syncing.
617          */

618         boolean onlyFlushDeferredWriteDbs = false;
619
620         while (dirtyMap.getNumLevels() > 0) {
621
622             /* Work on one level's worth of nodes in ascending level order. */
623             Integer JavaDoc currentLevel = dirtyMap.getLowestLevelSet();
624             int currentLevelVal = currentLevel.intValue();
625             boolean logProvisionally = (currentLevelVal != maxFlushLevel);
626
627             Set JavaDoc nodeSet = dirtyMap.getSet(currentLevel);
628             Iterator JavaDoc iter = nodeSet.iterator();
629
630             /* Flush all those nodes */
631             while (iter.hasNext()) {
632                 CheckpointReference targetRef =
633                     (CheckpointReference) iter.next();
634
635                 /*
636                  * Flush if we're below maxFlushLevel, or we're above and
637                  * syncing cleaned deferred write dbs.
638                  */

639                 if (!onlyFlushDeferredWriteDbs ||
640                     (onlyFlushDeferredWriteDbs &&
641                      targetRef.db.isDeferredWrite())) {
642
643                     /* Evict before each operation. */
644                     envImpl.getEvictor().doCriticalEviction
645                         (true); // backgroundIO
646

647                     /*
648                      * Check if the db is still valid since INs of deleted
649                      * databases are left on the in-memory tree until the post
650                      * transaction cleanup is finished.
651                      */

652                     if (!(targetRef.db.isDeleted())) {
653                         flushIN(envImpl, logManager,
654                                 targetRef, dirtyMap, currentLevelVal,
655                                 logProvisionally, allowDeltas, checkpointStart,
656                                 fstats);
657                     }
658
659                     /* Sleep if background read/write limit was exceeded. */
660                     envImpl.sleepAfterBackgroundIO();
661                 }
662                 
663                 iter.remove();
664             }
665
666             /* We're done with this level. */
667             dirtyMap.removeSet(currentLevel);
668
669             /*
670              * For all regular databases, we can stop checkpointing at the
671              * previously calculated level. For deferredWriteDbs that are
672              * being synced, we need to flush to the roots.
673              */

674             if (currentLevelVal == maxFlushLevel) {
675                 if (cleaningDeferredWriteDbs) {
676                     onlyFlushDeferredWriteDbs = true;
677                 } else {
678                     break;
679                 }
680             }
681         }
682     }
683
684     /**
685      * Flush the target IN.
686      */

687     private static void flushIN(EnvironmentImpl envImpl,
688                                 LogManager logManager,
689                                 CheckpointReference targetRef,
690                                 DirtyINMap dirtyMap,
691                                 int currentLevel,
692                                 boolean logProvisionally,
693                                 boolean allowDeltas,
694                                 long checkpointStart,
695                                 FlushStats fstats)
696         throws DatabaseException {
697
698         Tree tree = targetRef.db.getTree();
699         boolean targetWasRoot = false;
700         if (targetRef.isDbRoot) {
701
702             /* We're trying to flush the root. */
703             RootFlusher flusher =
704         new RootFlusher(targetRef.db, logManager, targetRef.nodeId);
705             tree.withRootLatchedExclusive(flusher);
706             boolean flushed = flusher.getFlushed();
707
708             /*
709              * If this target isn't the root anymore, we'll have to handle it
710              * like a regular node.
711              */

712             targetWasRoot = flusher.stillRoot();
713             
714             /*
715              * Update the tree's owner, whether it's the env root or the
716              * dbmapping tree.
717              */

718             if (flushed) {
719                 DbTree dbTree = targetRef.db.getDbEnvironment().getDbMapTree();
720                 dbTree.modifyDbRoot(targetRef.db);
721                 fstats.nFullINFlushThisRun++;
722                 fstats.nFullINFlush++;
723             }
724         }
725
726         /*
727          * The following attempt to flush applies to two cases:
728      *
729          * (1) the target was not ever the root
730      *
731          * (2) the target was the root, when the checkpoint dirty set was
732          * assembled but is not the root now.
733          */

734         if (!targetWasRoot) {
735
736             /*
737              * The "isRoot" param is used to stop a search in
738              * BIN.descendOnParentSearch and is passed as false (never stop).
739              */

740             SearchResult result =
741                 tree.getParentINForChildIN(targetRef.nodeId,
742                                            targetRef.containsDuplicates,
743                                            false, // isRoot
744
targetRef.mainTreeKey,
745                                            targetRef.dupTreeKey,
746                                            false, // requireExactMatch
747
false, // updateGeneration
748
-1, // targetLevel
749
null, // trackingList
750
false); // doFetch
751

752             /*
753              * We must make sure that every IN that was selected for the
754              * checkpointer's dirty IN set at the beginning of checkpoint is
755              * written into the log and can be properly accessed from
756              * ancestors. However, we have to take care for cases where the
757              * evictor has written out a member of this dirty set before the
758              * checkpointer got to it. See SR 10249.
759              *
760              * If no possible parent is found, the compressor may have deleted
761              * this item before we got to processing it.
762              */

763             if (result.parent != null) {
764                 boolean mustLogParent = false;
765                 try {
766                     if (result.exactParentFound) {
767
768                         /*
769                          * If the child has already been evicted, don't
770                          * refetch it.
771                          */

772                         IN renewedTarget =
773                             (IN) result.parent.getTarget(result.index);
774
775                         if (renewedTarget == null) {
776                             /* nAlreadyEvictedThisRun++; -- for future */
777                             mustLogParent = true;
778                         } else {
779                             mustLogParent =
780                                 logTargetAndUpdateParent(envImpl,
781                                                          renewedTarget,
782                                                          result.parent,
783                                                          result.index,
784                                                          allowDeltas,
785                                                          checkpointStart,
786                                                          logProvisionally,
787                                                          fstats);
788                         }
789                     } else {
790
791                         /* result.exactParentFound was false. */
792                         if (result.childNotResident) {
793
794                             /*
795                              * But it was because the child wasn't resident.
796                              * To be on the safe side, we'll put the parent
797                              * into the dirty set to be logged when that level
798                              * is processed.
799                              *
800                              * Only do this if the parent we found is at a
801                              * higher level than the child. This ensures that
802                              * the non-exact search does not find a sibling
803                              * rather than a parent. [#11555]
804                              */

805                             if (result.parent.getLevel() > currentLevel) {
806                                 mustLogParent = true;
807                             }
808                             /* nAlreadyEvictedThisRun++; -- for future. */
809                         }
810                     }
811
812                     if (mustLogParent) {
813                         assert
814                             checkParentChildRelationship(result, currentLevel):
815                             dumpParentChildInfo(result,
816                                                 result.parent,
817                                                 targetRef.nodeId,
818                                                 currentLevel,
819                                                 tree);
820
821                         dirtyMap.addDirtyIN(result.parent, true);
822                     }
823                 } finally {
824                     result.parent.releaseLatch();
825                 }
826             }
827         }
828     }
829     
830     /**
831      * @return true if this parent is appropriately 1 level above the child.
832      */

833     private static boolean checkParentChildRelationship(SearchResult result,
834                                                  int childLevel) {
835
836         if (result.childNotResident && !result.exactParentFound) {
837
838             /*
839              * This might be coming from the #11555 clause, in which case we
840              * are logging over-cautiously, but intentionally, and the levels
841              * might not pass the test below.
842              */

843             return true;
844         }
845
846         /*
847          * In the main tree or mapping tree, your parent must be in the same
848          * number space, and must be 1 more than the child. In the dup tree,
849          * the parent might be a BIN from the main tree.
850          */

851         int parentLevel = result.parent.getLevel();
852         boolean isMapTree = (childLevel & IN.DBMAP_LEVEL) != 0;
853         boolean isMainTree = (childLevel & IN.MAIN_LEVEL) != 0;
854
855         boolean checkOk = false;
856         if (isMapTree || isMainTree) {
857             /* The parent must be child level + 1 */
858             if (parentLevel == (childLevel + 1)) {
859                 checkOk = true;
860             }
861         } else {
862             if (childLevel == 1) {
863                 /* A DBIN must have a level 2 DIN parent. */
864                 if (parentLevel == 2) {
865                     checkOk = true;
866                 }
867             } else {
868                 /* A DIN must have either a BIN or DIN parent. */
869                 if ((parentLevel == IN.BIN_LEVEL) ||
870                     (parentLevel == childLevel + 1)) {
871                     checkOk = true;
872                 }
873             }
874         }
875         return checkOk;
876     }
877
878     private static String JavaDoc dumpParentChildInfo(SearchResult result,
879                                        IN parent,
880                                        long childNodeId,
881                                        int currentLevel,
882                                        Tree tree)
883         throws DatabaseException {
884
885         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
886         /* sb.append("ckptId=").append(checkpointId); */
887         sb.append(" result=").append(result);
888         sb.append(" parent node=").append(parent.getNodeId());
889         sb.append(" level=").append(parent.getLevel());
890         sb.append(" child node=").append(childNodeId);
891         sb.append(" level=").append(currentLevel);
892         return sb.toString();
893     }
894
895     private static boolean logTargetAndUpdateParent(EnvironmentImpl envImpl,
896                                                     IN target,
897                                                     IN parent,
898                                                     int index,
899                                                     boolean allowDeltas,
900                                                     long checkpointStart,
901                                                     boolean logProvisionally,
902                                                     FlushStats fstats)
903         throws DatabaseException {
904
905         long newLsn = DbLsn.NULL_LSN;
906         boolean mustLogParent = true;
907         target.latch(false);
908         try {
909
910             /*
911              * Compress this node if necessary. Note that this may dirty the
912              * node.
913              */

914             envImpl.lazyCompress(target);
915
916             if (target.getDirty()) {
917                 if (target.getDatabase().isDeferredWrite()) {
918
919                     /*
920                      * Find dirty descendants to avoid logging nodes with
921                      * never-logged children. See [#13936] and
922                      * IN.logDirtyChildren for description of the case.
923                      *
924                      * Note that we must log both dirty and never-logged
925                      * descendants to be sure to have a consistent view of the
926                      * split. If we didn't, we could end up with the post-split
927                      * version of a new sibling and the pre-split version of an
928                      * split sibling in the log, which could result in a
929                      * recovery where descendants are incorrectly duplicated,
930                      * because they are in both the pre-split split sibling,
931                      * and the post-split version of the new sibling.
932                      */

933                     target.logDirtyChildren();
934                 }
935
936                 /*
937                  * Note that target decides whether to log a delta. Only BINs
938                  * that fall into the required percentages and have not been
939                  * cleaned will be logged with a delta. Cleaner migration is
940                  * allowed.
941                  */

942                 newLsn = target.log(envImpl.getLogManager(),
943                                     allowDeltas,
944                                     logProvisionally,
945                                     true, // proactiveMigration
946
true, // backgroundIO
947
parent);
948
949                 if (allowDeltas && (newLsn == DbLsn.NULL_LSN)) {
950                     fstats.nDeltaINFlushThisRun++;
951                     fstats.nDeltaINFlush++;
952
953                     /*
954                      * If this BIN was already logged after checkpoint start
955                      * and before this point (i.e. by an eviction), we must
956                      * make sure that the last full version is accessible from
957                      * ancestors. We can skip logging parents only if this is
958                      * the first logging of this node in the checkpoint
959                      * interval.
960                      */

961                     long lastFullLsn = target.getLastFullVersion();
962                     if (DbLsn.compareTo(lastFullLsn,
963                                         checkpointStart) < 0) {
964                         mustLogParent = false;
965                     }
966                 }
967             }
968         } finally {
969             target.releaseLatch();
970         }
971
972         /* Update the parent if a full version was logged. */
973         if (newLsn != DbLsn.NULL_LSN) {
974             fstats.nFullINFlushThisRun++;
975             fstats.nFullINFlush++;
976             if (target instanceof BIN) {
977                 fstats.nFullBINFlush++;
978                 fstats.nFullBINFlushThisRun++;
979             }
980             parent.updateEntry(index, newLsn);
981         }
982         
983         return mustLogParent;
984     }
985
986     /*
987      * RootFlusher lets us write out the root IN within the root latch.
988      */

989     private static class RootFlusher implements WithRootLatched {
990         private DatabaseImpl db;
991         private boolean flushed;
992         private boolean stillRoot;
993         private LogManager logManager;
994         private long targetNodeId;
995
996         RootFlusher(DatabaseImpl db,
997                     LogManager logManager,
998                     long targetNodeId) {
999             this.db = db;
1000            flushed = false;
1001            this.logManager = logManager;
1002            this.targetNodeId = targetNodeId;
1003            stillRoot = false;
1004        }
1005
1006        /**
1007         * Flush the rootIN if dirty.
1008         */

1009        public IN doWork(ChildReference root)
1010            throws DatabaseException {
1011
1012        if (root == null) {
1013        return null;
1014        }
1015            IN rootIN = (IN) root.fetchTarget(db, null);
1016            rootIN.latch(false);
1017            try {
1018                if (rootIN.getNodeId() == targetNodeId) {
1019
1020                    /*
1021                     * Find dirty descendants to avoid logging nodes with
1022                     * never-logged children. See [#13936]
1023                     */

1024                    if (rootIN.getDatabase().isDeferredWrite()) {
1025                        rootIN.logDirtyChildren();
1026                    }
1027
1028                    /*
1029             * stillRoot handles the situation where the root was split
1030             * after it was placed in the checkpointer's dirty set.
1031                     */

1032                    stillRoot = true;
1033                    if (rootIN.getDirty()) {
1034                        long newLsn = rootIN.log(logManager);
1035                        root.setLsn(newLsn);
1036                        flushed = true;
1037                    }
1038                }
1039            } finally {
1040                rootIN.releaseLatch();
1041            }
1042            return null;
1043        }
1044
1045        boolean getFlushed() {
1046            return flushed;
1047        }
1048
1049        boolean stillRoot() {
1050            return stillRoot;
1051        }
1052    }
1053
1054    /*
1055     * CheckpointReferences are used to identify nodes that must be flushed as
1056     * part of the checkpoint. We don't keep an actual reference to the node
1057     * because that prevents nodes from being GC'ed during checkpoint.
1058     *
1059     * Using a checkpointReference introduces a window between the point when
1060     * the checkpoint dirty set is created and when the node is flushed. Some
1061     * of the fields saved in the reference are immutable: db, nodeId,
1062     * containsDuplicates. The others are not and we have to handle potential
1063     * change:
1064     *
1065     * isDbRoot: it's possible for isDbRoot to go from true->false, but not
1066     * false->true. True->false is handled by the flushIN method
1067     * by finding the root and checking if it is the target.
1068     * mainTreeKey, dupTreeKey: These can change only in the event of a
1069     * split. If they do, there is the chance that the checkpointer
1070     * will find the wrong node to flush, but that's okay because
1071     * the split guarantees flushing to the root, so the target will
1072     * be properly logged within the checkpoint period.
1073     *
1074     * The class and ctor are public for the Sizeof program.
1075     */

1076    public static class CheckpointReference {
1077        DatabaseImpl db;
1078        long nodeId;
1079        boolean containsDuplicates;
1080        boolean isDbRoot;
1081        byte[] mainTreeKey;
1082        byte[] dupTreeKey;
1083
1084        public CheckpointReference(DatabaseImpl db,
1085                            long nodeId,
1086                            boolean containsDuplicates,
1087                            boolean isDbRoot,
1088                            byte[] mainTreeKey,
1089                            byte[] dupTreeKey) {
1090            this.db = db;
1091            this.nodeId = nodeId;
1092            this.containsDuplicates = containsDuplicates;
1093            this.isDbRoot = isDbRoot;
1094            this.mainTreeKey = mainTreeKey;
1095            this.dupTreeKey = dupTreeKey;
1096        }
1097
1098        public boolean equals(Object JavaDoc o) {
1099            if (!(o instanceof CheckpointReference)) {
1100                return false;
1101            }
1102
1103            CheckpointReference other = (CheckpointReference) o;
1104            return nodeId == other.nodeId;
1105        }
1106
1107        public int hashCode() {
1108            return (int) nodeId;
1109        }
1110
1111        public String JavaDoc toString() {
1112            StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
1113            sb.append("db=").append(db.getId());
1114            sb.append(" nodeId=").append(nodeId);
1115            return sb.toString();
1116        }
1117    }
1118
1119    /**
1120     * A struct to hold log flushing stats for checkpoint and database sync.
1121     */

1122    public static class FlushStats {
1123
1124        public int nFullINFlush;
1125        public int nFullBINFlush;
1126        public int nDeltaINFlush;
1127        public int nFullINFlushThisRun;
1128        public int nFullBINFlushThisRun;
1129        public int nDeltaINFlushThisRun;
1130
1131        /* For future addition to stats:
1132           private int nAlreadyEvictedThisRun;
1133        */

1134
1135        /* Reset per-run counters. */
1136        void resetPerRunCounters() {
1137            nFullINFlushThisRun = 0;
1138            nFullBINFlushThisRun = 0;
1139            nDeltaINFlushThisRun = 0;
1140            /* nAlreadyEvictedThisRun = 0; -- for future */
1141        }
1142    }
1143}
1144
Popular Tags