Magellan Linux

Annotation of /trunk/db/patches/patch.4.1.25.2

Parent Directory Parent Directory | Revision Log Revision Log


Revision 144 - (hide annotations) (download)
Tue May 8 20:06:05 2007 UTC (17 years, 1 month ago) by niro
File size: 18100 byte(s)
-import

1 niro 144 *** dbinc/mp.h.orig 2004-02-02 10:24:53.000000000 -0800
2     --- dbinc/mp.h 2004-02-02 10:26:27.000000000 -0800
3     ***************
4     *** 149,154 ****
5     --- 149,161 ----
6     * region lock).
7     */
8     DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
9     +
10     + /*
11     + * We track page puts so that we can decide when allocation is never
12     + * going to succeed. We don't lock the field, all we care about is
13     + * if it changes.
14     + */
15     + u_int32_t put_counter; /* Count of page put calls. */
16     };
17    
18     struct __db_mpool_hash {
19     *** mp/mp_fput.c.orig 2002-08-13 06:26:41.000000000 -0700
20     --- mp/mp_fput.c 2004-02-02 10:22:35.000000000 -0800
21     ***************
22     *** 19,24 ****
23     --- 19,26 ----
24     #include "dbinc/db_shash.h"
25     #include "dbinc/mp.h"
26    
27     + static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
28     +
29     /*
30     * __memp_fput --
31     * Mpool file put function.
32     ***************
33     *** 198,202 ****
34     --- 200,255 ----
35    
36     MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
37    
38     + /*
39     + * On every buffer put we update the buffer generation number and check
40     + * for wraparound.
41     + */
42     + if (++c_mp->lru_count == UINT32_T_MAX)
43     + __memp_reset_lru(dbenv, dbmp->reginfo);
44     +
45     return (0);
46     }
47     +
48     + /*
49     + * __memp_reset_lru --
50     + * Reset the cache LRU counter.
51     + */
52     + static void
53     + __memp_reset_lru(dbenv, memreg)
54     + DB_ENV *dbenv;
55     + REGINFO *memreg;
56     + {
57     + BH *bhp;
58     + DB_MPOOL_HASH *hp;
59     + MPOOL *c_mp;
60     + int bucket;
61     +
62     + c_mp = memreg->primary;
63     +
64     + /*
65     + * Update the counter so all future allocations will start at the
66     + * bottom.
67     + */
68     + c_mp->lru_count -= MPOOL_BASE_DECREMENT;
69     +
70     + /* Adjust the priority of every buffer in the system. */
71     + for (hp = R_ADDR(memreg, c_mp->htab),
72     + bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
73     + /*
74     + * Skip empty buckets.
75     + *
76     + * We can check for empty buckets before locking as we
77     + * only care if the pointer is zero or non-zero.
78     + */
79     + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
80     + continue;
81     +
82     + MUTEX_LOCK(dbenv, &hp->hash_mutex);
83     + for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
84     + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
85     + if (bhp->priority != UINT32_T_MAX &&
86     + bhp->priority > MPOOL_BASE_DECREMENT)
87     + bhp->priority -= MPOOL_BASE_DECREMENT;
88     + MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
89     + }
90     + }
91     *** mp/mp_alloc.c.orig 2002-08-17 07:23:25.000000000 -0700
92     --- mp/mp_alloc.c 2004-02-02 10:28:15.000000000 -0800
93     ***************
94     *** 25,31 ****
95     } HS;
96    
97     static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
98     - static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
99    
100     /*
101     * __memp_alloc --
102     --- 25,30 ----
103     ***************
104     *** 50,57 ****
105     MPOOL *c_mp;
106     MPOOLFILE *bh_mfp;
107     size_t freed_space;
108     ! u_int32_t buckets, buffers, high_priority, max_na, priority;
109     ! int aggressive, ret;
110     void *p;
111    
112     dbenv = dbmp->dbenv;
113     --- 49,57 ----
114     MPOOL *c_mp;
115     MPOOLFILE *bh_mfp;
116     size_t freed_space;
117     ! u_int32_t buckets, buffers, high_priority, priority, put_counter;
118     ! u_int32_t total_buckets;
119     ! int aggressive, giveup, ret;
120     void *p;
121    
122     dbenv = dbmp->dbenv;
123     ***************
124     *** 59,76 ****
125     dbht = R_ADDR(memreg, c_mp->htab);
126     hp_end = &dbht[c_mp->htab_buckets];
127    
128     ! buckets = buffers = 0;
129     ! aggressive = 0;
130    
131     c_mp->stat.st_alloc++;
132    
133     /*
134     - * Get aggressive if we've tried to flush the number of pages as are
135     - * in the system without finding space.
136     - */
137     - max_na = 5 * c_mp->htab_buckets;
138     -
139     - /*
140     * If we're allocating a buffer, and the one we're discarding is the
141     * same size, we don't want to waste the time to re-integrate it into
142     * the shared memory free list. If the DB_MPOOLFILE argument isn't
143     --- 59,71 ----
144     dbht = R_ADDR(memreg, c_mp->htab);
145     hp_end = &dbht[c_mp->htab_buckets];
146    
147     ! buckets = buffers = put_counter = total_buckets = 0;
148     ! aggressive = giveup = 0;
149     ! hp_tmp = NULL;
150    
151     c_mp->stat.st_alloc++;
152    
153     /*
154     * If we're allocating a buffer, and the one we're discarding is the
155     * same size, we don't want to waste the time to re-integrate it into
156     * the shared memory free list. If the DB_MPOOLFILE argument isn't
157     ***************
158     *** 81,99 ****
159     len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
160    
161     R_LOCK(dbenv, memreg);
162     -
163     - /*
164     - * On every buffer allocation we update the buffer generation number
165     - * and check for wraparound.
166     - */
167     - if (++c_mp->lru_count == UINT32_T_MAX)
168     - __memp_reset_lru(dbenv, memreg, c_mp);
169     -
170     /*
171     * Anything newer than 1/10th of the buffer pool is ignored during
172     * allocation (unless allocation starts failing).
173     */
174     - DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
175     high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
176    
177     /*
178     --- 76,85 ----
179     ***************
180     *** 120,129 ****
181     * We're not holding the region locked here, these statistics
182     * can't be trusted.
183     */
184     ! if (buckets != 0) {
185     ! if (buckets > c_mp->stat.st_alloc_max_buckets)
186     ! c_mp->stat.st_alloc_max_buckets = buckets;
187     ! c_mp->stat.st_alloc_buckets += buckets;
188     }
189     if (buffers != 0) {
190     if (buffers > c_mp->stat.st_alloc_max_pages)
191     --- 106,116 ----
192     * We're not holding the region locked here, these statistics
193     * can't be trusted.
194     */
195     ! total_buckets += buckets;
196     ! if (total_buckets != 0) {
197     ! if (total_buckets > c_mp->stat.st_alloc_max_buckets)
198     ! c_mp->stat.st_alloc_max_buckets = total_buckets;
199     ! c_mp->stat.st_alloc_buckets += total_buckets;
200     }
201     if (buffers != 0) {
202     if (buffers > c_mp->stat.st_alloc_max_pages)
203     ***************
204     *** 131,136 ****
205     --- 118,129 ----
206     c_mp->stat.st_alloc_pages += buffers;
207     }
208     return (0);
209     + } else if (giveup || c_mp->stat.st_pages == 0) {
210     + R_UNLOCK(dbenv, memreg);
211     +
212     + __db_err(dbenv,
213     + "unable to allocate space from the buffer cache");
214     + return (ret);
215     }
216    
217     /*
218     ***************
219     *** 138,163 ****
220     * we need. Reset our free-space counter.
221     */
222     freed_space = 0;
223    
224     /*
225     * Walk the hash buckets and find the next two with potentially useful
226     * buffers. Free the buffer with the lowest priority from the buckets'
227     * chains.
228     */
229     ! for (hp_tmp = NULL;;) {
230     /* Check for wrap around. */
231     hp = &dbht[c_mp->last_checked++];
232     if (hp >= hp_end) {
233     c_mp->last_checked = 0;
234     !
235     ! /*
236     ! * If we've gone through all of the hash buckets, try
237     ! * an allocation. If the cache is small, the old page
238     ! * size is small, and the new page size is large, we
239     ! * might have freed enough memory (but not 3 times the
240     ! * memory).
241     ! */
242     ! goto alloc;
243     }
244    
245     /*
246     --- 131,154 ----
247     * we need. Reset our free-space counter.
248     */
249     freed_space = 0;
250     + total_buckets += buckets;
251     + buckets = 0;
252    
253     /*
254     * Walk the hash buckets and find the next two with potentially useful
255     * buffers. Free the buffer with the lowest priority from the buckets'
256     * chains.
257     */
258     ! for (;;) {
259     ! /* All pages have been freed, make one last try */
260     ! if (c_mp->stat.st_pages == 0)
261     ! goto alloc;
262     !
263     /* Check for wrap around. */
264     hp = &dbht[c_mp->last_checked++];
265     if (hp >= hp_end) {
266     c_mp->last_checked = 0;
267     ! hp = &dbht[c_mp->last_checked++];
268     }
269    
270     /*
271     ***************
272     *** 172,210 ****
273     /*
274     * The failure mode is when there are too many buffers we can't
275     * write or there's not enough memory in the system. We don't
276     ! * have a metric for deciding if allocation has no possible way
277     ! * to succeed, so we don't ever fail, we assume memory will be
278     ! * available if we wait long enough.
279     *
280     ! * Get aggressive if we've tried to flush 5 times the number of
281     ! * hash buckets as are in the system -- it's possible we have
282     ! * been repeatedly trying to flush the same buffers, although
283     ! * it's unlikely. Aggressive means:
284     *
285     * a: set a flag to attempt to flush high priority buffers as
286     * well as other buffers.
287     * b: sync the mpool to force out queue extent pages. While we
288     * might not have enough space for what we want and flushing
289     * is expensive, why not?
290     ! * c: sleep for a second -- hopefully someone else will run and
291     ! * free up some memory. Try to allocate memory too, in case
292     ! * the other thread returns its memory to the region.
293     ! * d: look at a buffer in every hash bucket rather than choose
294     * the more preferable of two.
295     *
296     * !!!
297     * This test ignores pathological cases like no buffers in the
298     * system -- that shouldn't be possible.
299     */
300     ! if ((++buckets % max_na) == 0) {
301     ! aggressive = 1;
302     !
303     R_UNLOCK(dbenv, memreg);
304    
305     ! (void)__memp_sync_int(
306     ! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
307     !
308     ! (void)__os_sleep(dbenv, 1, 0);
309    
310     R_LOCK(dbenv, memreg);
311     goto alloc;
312     --- 163,221 ----
313     /*
314     * The failure mode is when there are too many buffers we can't
315     * write or there's not enough memory in the system. We don't
316     ! * have a way to know that allocation has no way to succeed.
317     ! * We fail if there were no pages returned to the cache after
318     ! * we've been trying for a relatively long time.
319     *
320     ! * Get aggressive if we've tried to flush the number of hash
321     ! * buckets as are in the system and have not found any more
322     ! * space. Aggressive means:
323     *
324     * a: set a flag to attempt to flush high priority buffers as
325     * well as other buffers.
326     * b: sync the mpool to force out queue extent pages. While we
327     * might not have enough space for what we want and flushing
328     * is expensive, why not?
329     ! * c: look at a buffer in every hash bucket rather than choose
330     * the more preferable of two.
331     + * d: start to think about giving up.
332     + *
333     + * If we get here twice, sleep for a second, hopefully someone
334     + * else will run and free up some memory.
335     + *
336     + * Always try to allocate memory too, in case some other thread
337     + * returns its memory to the region.
338     *
339     * !!!
340     * This test ignores pathological cases like no buffers in the
341     * system -- that shouldn't be possible.
342     */
343     ! if ((++buckets % c_mp->htab_buckets) == 0) {
344     ! if (freed_space > 0)
345     ! goto alloc;
346     R_UNLOCK(dbenv, memreg);
347    
348     ! switch (++aggressive) {
349     ! case 1:
350     ! break;
351     ! case 2:
352     ! put_counter = c_mp->put_counter;
353     ! /* FALLTHROUGH */
354     ! case 3:
355     ! case 4:
356     ! case 5:
357     ! case 6:
358     ! (void)__memp_sync_int(
359     ! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
360     !
361     ! (void)__os_sleep(dbenv, 1, 0);
362     ! break;
363     ! default:
364     ! aggressive = 1;
365     ! if (put_counter == c_mp->put_counter)
366     ! giveup = 1;
367     ! break;
368     ! }
369    
370     R_LOCK(dbenv, memreg);
371     goto alloc;
372     ***************
373     *** 277,283 ****
374     * thread may have acquired this buffer and incremented the ref
375     * count after we wrote it, in which case we can't have it.
376     *
377     ! * If there's a write error, avoid selecting this buffer again
378     * by making it the bucket's least-desirable buffer.
379     */
380     if (ret != 0 || bhp->ref != 0) {
381     --- 288,295 ----
382     * thread may have acquired this buffer and incremented the ref
383     * count after we wrote it, in which case we can't have it.
384     *
385     ! * If there's a write error and we're having problems finding
386     ! * something to allocate, avoid selecting this buffer again
387     * by making it the bucket's least-desirable buffer.
388     */
389     if (ret != 0 || bhp->ref != 0) {
390     ***************
391     *** 301,306 ****
392     --- 313,320 ----
393    
394     freed_space += __db_shsizeof(bhp);
395     __memp_bhfree(dbmp, hp, bhp, 1);
396     + if (aggressive > 1)
397     + aggressive = 1;
398    
399     /*
400     * Unlock this hash bucket and re-acquire the region lock. If
401     ***************
402     *** 362,415 ****
403     hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
404     }
405    
406     - /*
407     - * __memp_reset_lru --
408     - * Reset the cache LRU counter.
409     - */
410     - static void
411     - __memp_reset_lru(dbenv, memreg, c_mp)
412     - DB_ENV *dbenv;
413     - REGINFO *memreg;
414     - MPOOL *c_mp;
415     - {
416     - BH *bhp;
417     - DB_MPOOL_HASH *hp;
418     - int bucket;
419     -
420     - /*
421     - * Update the counter so all future allocations will start at the
422     - * bottom.
423     - */
424     - c_mp->lru_count -= MPOOL_BASE_DECREMENT;
425     -
426     - /* Release the region lock. */
427     - R_UNLOCK(dbenv, memreg);
428     -
429     - /* Adjust the priority of every buffer in the system. */
430     - for (hp = R_ADDR(memreg, c_mp->htab),
431     - bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
432     - /*
433     - * Skip empty buckets.
434     - *
435     - * We can check for empty buckets before locking as we
436     - * only care if the pointer is zero or non-zero.
437     - */
438     - if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
439     - continue;
440     -
441     - MUTEX_LOCK(dbenv, &hp->hash_mutex);
442     - for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
443     - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
444     - if (bhp->priority != UINT32_T_MAX &&
445     - bhp->priority > MPOOL_BASE_DECREMENT)
446     - bhp->priority -= MPOOL_BASE_DECREMENT;
447     - MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
448     - }
449     -
450     - /* Reacquire the region lock. */
451     - R_LOCK(dbenv, memreg);
452     - }
453     -
454     #ifdef DIAGNOSTIC
455     /*
456     * __memp_check_order --
457     --- 376,381 ----
458     *** dbreg/dbreg_rec.c.orig 2002-08-17 07:22:52.000000000 -0700
459     --- dbreg/dbreg_rec.c 2003-11-08 10:59:19.000000000 -0800
460     ***************
461     *** 174,192 ****
462     * Typically, closes should match an open which means
463     * that if this is a close, there should be a valid
464     * entry in the dbentry table when we get here,
465     ! * however there is an exception. If this is an
466     * OPENFILES pass, then we may have started from
467     * a log file other than the first, and the
468     * corresponding open appears in an earlier file.
469     ! * We can ignore that case, but all others are errors.
470     */
471     dbe = &dblp->dbentry[argp->fileid];
472     if (dbe->dbp == NULL && !dbe->deleted) {
473     /* No valid entry here. */
474     ! if ((argp->opcode != LOG_CLOSE &&
475     ! argp->opcode != LOG_RCLOSE) ||
476     ! (op != DB_TXN_OPENFILES &&
477     ! op !=DB_TXN_POPENFILES)) {
478     __db_err(dbenv,
479     "Improper file close at %lu/%lu",
480     (u_long)lsnp->file,
481     --- 174,193 ----
482     * Typically, closes should match an open which means
483     * that if this is a close, there should be a valid
484     * entry in the dbentry table when we get here,
485     ! * however there are exceptions. 1. If this is an
486     * OPENFILES pass, then we may have started from
487     * a log file other than the first, and the
488     * corresponding open appears in an earlier file.
489     ! * 2. If we are undoing an open on an abort or
490     ! * recovery, it's possible that we failed after
491     ! * the log record, but before we actually entered
492     ! * a handle here.
493     */
494     dbe = &dblp->dbentry[argp->fileid];
495     if (dbe->dbp == NULL && !dbe->deleted) {
496     /* No valid entry here. */
497     ! if (DB_REDO(op) ||
498     ! argp->opcode == LOG_CHECKPOINT) {
499     __db_err(dbenv,
500     "Improper file close at %lu/%lu",
501     (u_long)lsnp->file,
502     *** env/env_recover.c.orig.1 2002-08-22 14:52:51.000000000 -0700
503     --- env/env_recover.c 2003-11-15 08:20:59.000000000 -0800
504     ***************
505     *** 232,243 ****
506     * we'll still need to do a vtruncate based on information we haven't
507     * yet collected.
508     */
509     ! if (ret == DB_NOTFOUND) {
510     ret = 0;
511     ! if (max_lsn == NULL)
512     ! goto done;
513     ! }
514     ! if (ret != 0)
515     goto err;
516    
517     hi_txn = txnid;
518     --- 232,240 ----
519     * we'll still need to do a vtruncate based on information we haven't
520     * yet collected.
521     */
522     ! if (ret == DB_NOTFOUND)
523     ret = 0;
524     ! else if (ret != 0)
525     goto err;
526    
527     hi_txn = txnid;
528     ***************
529     *** 331,337 ****
530    
531     /* Find a low txnid. */
532     ret = 0;
533     ! do {
534     /* txnid is after rectype, which is a u_int32. */
535     memcpy(&txnid,
536     (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
537     --- 328,334 ----
538    
539     /* Find a low txnid. */
540     ret = 0;
541     ! if (hi_txn != 0) do {
542     /* txnid is after rectype, which is a u_int32. */
543     memcpy(&txnid,
544     (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
545     ***************
546     *** 344,354 ****
547     * There are no transactions and we're not recovering to an LSN (see
548     * above), so there is nothing to do.
549     */
550     ! if (ret == DB_NOTFOUND) {
551     ret = 0;
552     - if (max_lsn == NULL)
553     - goto done;
554     - }
555    
556     /* Reset to the first lsn. */
557     if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
558     --- 341,348 ----
559     * There are no transactions and we're not recovering to an LSN (see
560     * above), so there is nothing to do.
561     */
562     ! if (ret == DB_NOTFOUND)
563     ret = 0;
564    
565     /* Reset to the first lsn. */
566     if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
567     ***************
568     *** 367,372 ****
569     --- 361,370 ----
570     txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
571     goto err;
572    
573     + /* If there were no transactions, then we can bail out early. */
574     + if (hi_txn == 0 && max_lsn == NULL)
575     + goto done;
576     +
577     /*
578     * Pass #2.
579     *
580     ***************
581     *** 483,488 ****
582     --- 481,487 ----
583     if ((ret = __dbreg_close_files(dbenv)) != 0)
584     goto err;
585    
586     + done:
587     if (max_lsn != NULL) {
588     region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
589    
590     ***************
591     *** 538,544 ****
592     __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
593     __db_err(dbenv, "%s %lx %s [%lu][%lu]",
594     "Maximum transaction ID",
595     ! ((DB_TXNHEAD *)txninfo)->maxid,
596     "Recovery checkpoint",
597     (u_long)region->last_ckp.file,
598     (u_long)region->last_ckp.offset);
599     --- 537,544 ----
600     __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
601     __db_err(dbenv, "%s %lx %s [%lu][%lu]",
602     "Maximum transaction ID",
603     ! txninfo == NULL ? TXN_MINIMUM :
604     ! ((DB_TXNHEAD *)txninfo)->maxid,
605     "Recovery checkpoint",
606     (u_long)region->last_ckp.file,
607     (u_long)region->last_ckp.offset);
608     ***************
609     *** 550,556 ****
610     (u_long)lsn.file, (u_long)lsn.offset, pass);
611     }
612    
613     - done:
614     err: if (lockid != DB_LOCK_INVALIDID) {
615     if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
616     ret = t_ret;
617     --- 550,555 ----