Magellan Linux

Contents of /trunk/db/patches/patch.4.1.25.2

Parent Directory Parent Directory | Revision Log Revision Log


Revision 144 - (show annotations) (download)
Tue May 8 20:06:05 2007 UTC (17 years ago) by niro
File size: 18100 byte(s)
-import

1 *** dbinc/mp.h.orig 2004-02-02 10:24:53.000000000 -0800
2 --- dbinc/mp.h 2004-02-02 10:26:27.000000000 -0800
3 ***************
4 *** 149,154 ****
5 --- 149,161 ----
6 * region lock).
7 */
8 DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
9 +
10 + /*
11 + * We track page puts so that we can decide when allocation is never
12 + * going to succeed. We don't lock the field, all we care about is
13 + * if it changes.
14 + */
15 + u_int32_t put_counter; /* Count of page put calls. */
16 };
17
18 struct __db_mpool_hash {
19 *** mp/mp_fput.c.orig 2002-08-13 06:26:41.000000000 -0700
20 --- mp/mp_fput.c 2004-02-02 10:22:35.000000000 -0800
21 ***************
22 *** 19,24 ****
23 --- 19,26 ----
24 #include "dbinc/db_shash.h"
25 #include "dbinc/mp.h"
26
27 + static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
28 +
29 /*
30 * __memp_fput --
31 * Mpool file put function.
32 ***************
33 *** 198,202 ****
34 --- 200,255 ----
35
36 MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
37
38 + /*
39 + * On every buffer put we update the buffer generation number and check
40 + * for wraparound.
41 + */
42 + if (++c_mp->lru_count == UINT32_T_MAX)
43 + __memp_reset_lru(dbenv, dbmp->reginfo);
44 +
45 return (0);
46 }
47 +
48 + /*
49 + * __memp_reset_lru --
50 + * Reset the cache LRU counter.
51 + */
52 + static void
53 + __memp_reset_lru(dbenv, memreg)
54 + DB_ENV *dbenv;
55 + REGINFO *memreg;
56 + {
57 + BH *bhp;
58 + DB_MPOOL_HASH *hp;
59 + MPOOL *c_mp;
60 + int bucket;
61 +
62 + c_mp = memreg->primary;
63 +
64 + /*
65 + * Update the counter so all future allocations will start at the
66 + * bottom.
67 + */
68 + c_mp->lru_count -= MPOOL_BASE_DECREMENT;
69 +
70 + /* Adjust the priority of every buffer in the system. */
71 + for (hp = R_ADDR(memreg, c_mp->htab),
72 + bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
73 + /*
74 + * Skip empty buckets.
75 + *
76 + * We can check for empty buckets before locking as we
77 + * only care if the pointer is zero or non-zero.
78 + */
79 + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
80 + continue;
81 +
82 + MUTEX_LOCK(dbenv, &hp->hash_mutex);
83 + for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
84 + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
85 + if (bhp->priority != UINT32_T_MAX &&
86 + bhp->priority > MPOOL_BASE_DECREMENT)
87 + bhp->priority -= MPOOL_BASE_DECREMENT;
88 + MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
89 + }
90 + }
91 *** mp/mp_alloc.c.orig 2002-08-17 07:23:25.000000000 -0700
92 --- mp/mp_alloc.c 2004-02-02 10:28:15.000000000 -0800
93 ***************
94 *** 25,31 ****
95 } HS;
96
97 static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
98 - static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
99
100 /*
101 * __memp_alloc --
102 --- 25,30 ----
103 ***************
104 *** 50,57 ****
105 MPOOL *c_mp;
106 MPOOLFILE *bh_mfp;
107 size_t freed_space;
108 ! u_int32_t buckets, buffers, high_priority, max_na, priority;
109 ! int aggressive, ret;
110 void *p;
111
112 dbenv = dbmp->dbenv;
113 --- 49,57 ----
114 MPOOL *c_mp;
115 MPOOLFILE *bh_mfp;
116 size_t freed_space;
117 ! u_int32_t buckets, buffers, high_priority, priority, put_counter;
118 ! u_int32_t total_buckets;
119 ! int aggressive, giveup, ret;
120 void *p;
121
122 dbenv = dbmp->dbenv;
123 ***************
124 *** 59,76 ****
125 dbht = R_ADDR(memreg, c_mp->htab);
126 hp_end = &dbht[c_mp->htab_buckets];
127
128 ! buckets = buffers = 0;
129 ! aggressive = 0;
130
131 c_mp->stat.st_alloc++;
132
133 /*
134 - * Get aggressive if we've tried to flush the number of pages as are
135 - * in the system without finding space.
136 - */
137 - max_na = 5 * c_mp->htab_buckets;
138 -
139 - /*
140 * If we're allocating a buffer, and the one we're discarding is the
141 * same size, we don't want to waste the time to re-integrate it into
142 * the shared memory free list. If the DB_MPOOLFILE argument isn't
143 --- 59,71 ----
144 dbht = R_ADDR(memreg, c_mp->htab);
145 hp_end = &dbht[c_mp->htab_buckets];
146
147 ! buckets = buffers = put_counter = total_buckets = 0;
148 ! aggressive = giveup = 0;
149 ! hp_tmp = NULL;
150
151 c_mp->stat.st_alloc++;
152
153 /*
154 * If we're allocating a buffer, and the one we're discarding is the
155 * same size, we don't want to waste the time to re-integrate it into
156 * the shared memory free list. If the DB_MPOOLFILE argument isn't
157 ***************
158 *** 81,99 ****
159 len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
160
161 R_LOCK(dbenv, memreg);
162 -
163 - /*
164 - * On every buffer allocation we update the buffer generation number
165 - * and check for wraparound.
166 - */
167 - if (++c_mp->lru_count == UINT32_T_MAX)
168 - __memp_reset_lru(dbenv, memreg, c_mp);
169 -
170 /*
171 * Anything newer than 1/10th of the buffer pool is ignored during
172 * allocation (unless allocation starts failing).
173 */
174 - DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
175 high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
176
177 /*
178 --- 76,85 ----
179 ***************
180 *** 120,129 ****
181 * We're not holding the region locked here, these statistics
182 * can't be trusted.
183 */
184 ! if (buckets != 0) {
185 ! if (buckets > c_mp->stat.st_alloc_max_buckets)
186 ! c_mp->stat.st_alloc_max_buckets = buckets;
187 ! c_mp->stat.st_alloc_buckets += buckets;
188 }
189 if (buffers != 0) {
190 if (buffers > c_mp->stat.st_alloc_max_pages)
191 --- 106,116 ----
192 * We're not holding the region locked here, these statistics
193 * can't be trusted.
194 */
195 ! total_buckets += buckets;
196 ! if (total_buckets != 0) {
197 ! if (total_buckets > c_mp->stat.st_alloc_max_buckets)
198 ! c_mp->stat.st_alloc_max_buckets = total_buckets;
199 ! c_mp->stat.st_alloc_buckets += total_buckets;
200 }
201 if (buffers != 0) {
202 if (buffers > c_mp->stat.st_alloc_max_pages)
203 ***************
204 *** 131,136 ****
205 --- 118,129 ----
206 c_mp->stat.st_alloc_pages += buffers;
207 }
208 return (0);
209 + } else if (giveup || c_mp->stat.st_pages == 0) {
210 + R_UNLOCK(dbenv, memreg);
211 +
212 + __db_err(dbenv,
213 + "unable to allocate space from the buffer cache");
214 + return (ret);
215 }
216
217 /*
218 ***************
219 *** 138,163 ****
220 * we need. Reset our free-space counter.
221 */
222 freed_space = 0;
223
224 /*
225 * Walk the hash buckets and find the next two with potentially useful
226 * buffers. Free the buffer with the lowest priority from the buckets'
227 * chains.
228 */
229 ! for (hp_tmp = NULL;;) {
230 /* Check for wrap around. */
231 hp = &dbht[c_mp->last_checked++];
232 if (hp >= hp_end) {
233 c_mp->last_checked = 0;
234 !
235 ! /*
236 ! * If we've gone through all of the hash buckets, try
237 ! * an allocation. If the cache is small, the old page
238 ! * size is small, and the new page size is large, we
239 ! * might have freed enough memory (but not 3 times the
240 ! * memory).
241 ! */
242 ! goto alloc;
243 }
244
245 /*
246 --- 131,154 ----
247 * we need. Reset our free-space counter.
248 */
249 freed_space = 0;
250 + total_buckets += buckets;
251 + buckets = 0;
252
253 /*
254 * Walk the hash buckets and find the next two with potentially useful
255 * buffers. Free the buffer with the lowest priority from the buckets'
256 * chains.
257 */
258 ! for (;;) {
259 ! /* All pages have been freed, make one last try */
260 ! if (c_mp->stat.st_pages == 0)
261 ! goto alloc;
262 !
263 /* Check for wrap around. */
264 hp = &dbht[c_mp->last_checked++];
265 if (hp >= hp_end) {
266 c_mp->last_checked = 0;
267 ! hp = &dbht[c_mp->last_checked++];
268 }
269
270 /*
271 ***************
272 *** 172,210 ****
273 /*
274 * The failure mode is when there are too many buffers we can't
275 * write or there's not enough memory in the system. We don't
276 ! * have a metric for deciding if allocation has no possible way
277 ! * to succeed, so we don't ever fail, we assume memory will be
278 ! * available if we wait long enough.
279 *
280 ! * Get aggressive if we've tried to flush 5 times the number of
281 ! * hash buckets as are in the system -- it's possible we have
282 ! * been repeatedly trying to flush the same buffers, although
283 ! * it's unlikely. Aggressive means:
284 *
285 * a: set a flag to attempt to flush high priority buffers as
286 * well as other buffers.
287 * b: sync the mpool to force out queue extent pages. While we
288 * might not have enough space for what we want and flushing
289 * is expensive, why not?
290 ! * c: sleep for a second -- hopefully someone else will run and
291 ! * free up some memory. Try to allocate memory too, in case
292 ! * the other thread returns its memory to the region.
293 ! * d: look at a buffer in every hash bucket rather than choose
294 * the more preferable of two.
295 *
296 * !!!
297 * This test ignores pathological cases like no buffers in the
298 * system -- that shouldn't be possible.
299 */
300 ! if ((++buckets % max_na) == 0) {
301 ! aggressive = 1;
302 !
303 R_UNLOCK(dbenv, memreg);
304
305 ! (void)__memp_sync_int(
306 ! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
307 !
308 ! (void)__os_sleep(dbenv, 1, 0);
309
310 R_LOCK(dbenv, memreg);
311 goto alloc;
312 --- 163,221 ----
313 /*
314 * The failure mode is when there are too many buffers we can't
315 * write or there's not enough memory in the system. We don't
316 ! * have a way to know that allocation has no way to succeed.
317 ! * We fail if there were no pages returned to the cache after
318 ! * we've been trying for a relatively long time.
319 *
320 ! * Get aggressive if we've tried to flush the number of hash
321 ! * buckets as are in the system and have not found any more
322 ! * space. Aggressive means:
323 *
324 * a: set a flag to attempt to flush high priority buffers as
325 * well as other buffers.
326 * b: sync the mpool to force out queue extent pages. While we
327 * might not have enough space for what we want and flushing
328 * is expensive, why not?
329 ! * c: look at a buffer in every hash bucket rather than choose
330 * the more preferable of two.
331 + * d: start to think about giving up.
332 + *
333 + * If we get here twice, sleep for a second, hopefully someone
334 + * else will run and free up some memory.
335 + *
336 + * Always try to allocate memory too, in case some other thread
337 + * returns its memory to the region.
338 *
339 * !!!
340 * This test ignores pathological cases like no buffers in the
341 * system -- that shouldn't be possible.
342 */
343 ! if ((++buckets % c_mp->htab_buckets) == 0) {
344 ! if (freed_space > 0)
345 ! goto alloc;
346 R_UNLOCK(dbenv, memreg);
347
348 ! switch (++aggressive) {
349 ! case 1:
350 ! break;
351 ! case 2:
352 ! put_counter = c_mp->put_counter;
353 ! /* FALLTHROUGH */
354 ! case 3:
355 ! case 4:
356 ! case 5:
357 ! case 6:
358 ! (void)__memp_sync_int(
359 ! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
360 !
361 ! (void)__os_sleep(dbenv, 1, 0);
362 ! break;
363 ! default:
364 ! aggressive = 1;
365 ! if (put_counter == c_mp->put_counter)
366 ! giveup = 1;
367 ! break;
368 ! }
369
370 R_LOCK(dbenv, memreg);
371 goto alloc;
372 ***************
373 *** 277,283 ****
374 * thread may have acquired this buffer and incremented the ref
375 * count after we wrote it, in which case we can't have it.
376 *
377 ! * If there's a write error, avoid selecting this buffer again
378 * by making it the bucket's least-desirable buffer.
379 */
380 if (ret != 0 || bhp->ref != 0) {
381 --- 288,295 ----
382 * thread may have acquired this buffer and incremented the ref
383 * count after we wrote it, in which case we can't have it.
384 *
385 ! * If there's a write error and we're having problems finding
386 ! * something to allocate, avoid selecting this buffer again
387 * by making it the bucket's least-desirable buffer.
388 */
389 if (ret != 0 || bhp->ref != 0) {
390 ***************
391 *** 301,306 ****
392 --- 313,320 ----
393
394 freed_space += __db_shsizeof(bhp);
395 __memp_bhfree(dbmp, hp, bhp, 1);
396 + if (aggressive > 1)
397 + aggressive = 1;
398
399 /*
400 * Unlock this hash bucket and re-acquire the region lock. If
401 ***************
402 *** 362,415 ****
403 hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
404 }
405
406 - /*
407 - * __memp_reset_lru --
408 - * Reset the cache LRU counter.
409 - */
410 - static void
411 - __memp_reset_lru(dbenv, memreg, c_mp)
412 - DB_ENV *dbenv;
413 - REGINFO *memreg;
414 - MPOOL *c_mp;
415 - {
416 - BH *bhp;
417 - DB_MPOOL_HASH *hp;
418 - int bucket;
419 -
420 - /*
421 - * Update the counter so all future allocations will start at the
422 - * bottom.
423 - */
424 - c_mp->lru_count -= MPOOL_BASE_DECREMENT;
425 -
426 - /* Release the region lock. */
427 - R_UNLOCK(dbenv, memreg);
428 -
429 - /* Adjust the priority of every buffer in the system. */
430 - for (hp = R_ADDR(memreg, c_mp->htab),
431 - bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
432 - /*
433 - * Skip empty buckets.
434 - *
435 - * We can check for empty buckets before locking as we
436 - * only care if the pointer is zero or non-zero.
437 - */
438 - if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
439 - continue;
440 -
441 - MUTEX_LOCK(dbenv, &hp->hash_mutex);
442 - for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
443 - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
444 - if (bhp->priority != UINT32_T_MAX &&
445 - bhp->priority > MPOOL_BASE_DECREMENT)
446 - bhp->priority -= MPOOL_BASE_DECREMENT;
447 - MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
448 - }
449 -
450 - /* Reacquire the region lock. */
451 - R_LOCK(dbenv, memreg);
452 - }
453 -
454 #ifdef DIAGNOSTIC
455 /*
456 * __memp_check_order --
457 --- 376,381 ----
458 *** dbreg/dbreg_rec.c.orig 2002-08-17 07:22:52.000000000 -0700
459 --- dbreg/dbreg_rec.c 2003-11-08 10:59:19.000000000 -0800
460 ***************
461 *** 174,192 ****
462 * Typically, closes should match an open which means
463 * that if this is a close, there should be a valid
464 * entry in the dbentry table when we get here,
465 ! * however there is an exception. If this is an
466 * OPENFILES pass, then we may have started from
467 * a log file other than the first, and the
468 * corresponding open appears in an earlier file.
469 ! * We can ignore that case, but all others are errors.
470 */
471 dbe = &dblp->dbentry[argp->fileid];
472 if (dbe->dbp == NULL && !dbe->deleted) {
473 /* No valid entry here. */
474 ! if ((argp->opcode != LOG_CLOSE &&
475 ! argp->opcode != LOG_RCLOSE) ||
476 ! (op != DB_TXN_OPENFILES &&
477 ! op !=DB_TXN_POPENFILES)) {
478 __db_err(dbenv,
479 "Improper file close at %lu/%lu",
480 (u_long)lsnp->file,
481 --- 174,193 ----
482 * Typically, closes should match an open which means
483 * that if this is a close, there should be a valid
484 * entry in the dbentry table when we get here,
485 ! * however there are exceptions. 1. If this is an
486 * OPENFILES pass, then we may have started from
487 * a log file other than the first, and the
488 * corresponding open appears in an earlier file.
489 ! * 2. If we are undoing an open on an abort or
490 ! * recovery, it's possible that we failed after
491 ! * the log record, but before we actually entered
492 ! * a handle here.
493 */
494 dbe = &dblp->dbentry[argp->fileid];
495 if (dbe->dbp == NULL && !dbe->deleted) {
496 /* No valid entry here. */
497 ! if (DB_REDO(op) ||
498 ! argp->opcode == LOG_CHECKPOINT) {
499 __db_err(dbenv,
500 "Improper file close at %lu/%lu",
501 (u_long)lsnp->file,
502 *** env/env_recover.c.orig.1 2002-08-22 14:52:51.000000000 -0700
503 --- env/env_recover.c 2003-11-15 08:20:59.000000000 -0800
504 ***************
505 *** 232,243 ****
506 * we'll still need to do a vtruncate based on information we haven't
507 * yet collected.
508 */
509 ! if (ret == DB_NOTFOUND) {
510 ret = 0;
511 ! if (max_lsn == NULL)
512 ! goto done;
513 ! }
514 ! if (ret != 0)
515 goto err;
516
517 hi_txn = txnid;
518 --- 232,240 ----
519 * we'll still need to do a vtruncate based on information we haven't
520 * yet collected.
521 */
522 ! if (ret == DB_NOTFOUND)
523 ret = 0;
524 ! else if (ret != 0)
525 goto err;
526
527 hi_txn = txnid;
528 ***************
529 *** 331,337 ****
530
531 /* Find a low txnid. */
532 ret = 0;
533 ! do {
534 /* txnid is after rectype, which is a u_int32. */
535 memcpy(&txnid,
536 (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
537 --- 328,334 ----
538
539 /* Find a low txnid. */
540 ret = 0;
541 ! if (hi_txn != 0) do {
542 /* txnid is after rectype, which is a u_int32. */
543 memcpy(&txnid,
544 (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
545 ***************
546 *** 344,354 ****
547 * There are no transactions and we're not recovering to an LSN (see
548 * above), so there is nothing to do.
549 */
550 ! if (ret == DB_NOTFOUND) {
551 ret = 0;
552 - if (max_lsn == NULL)
553 - goto done;
554 - }
555
556 /* Reset to the first lsn. */
557 if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
558 --- 341,348 ----
559 * There are no transactions and we're not recovering to an LSN (see
560 * above), so there is nothing to do.
561 */
562 ! if (ret == DB_NOTFOUND)
563 ret = 0;
564
565 /* Reset to the first lsn. */
566 if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
567 ***************
568 *** 367,372 ****
569 --- 361,370 ----
570 txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
571 goto err;
572
573 + /* If there were no transactions, then we can bail out early. */
574 + if (hi_txn == 0 && max_lsn == NULL)
575 + goto done;
576 +
577 /*
578 * Pass #2.
579 *
580 ***************
581 *** 483,488 ****
582 --- 481,487 ----
583 if ((ret = __dbreg_close_files(dbenv)) != 0)
584 goto err;
585
586 + done:
587 if (max_lsn != NULL) {
588 region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
589
590 ***************
591 *** 538,544 ****
592 __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
593 __db_err(dbenv, "%s %lx %s [%lu][%lu]",
594 "Maximum transaction ID",
595 ! ((DB_TXNHEAD *)txninfo)->maxid,
596 "Recovery checkpoint",
597 (u_long)region->last_ckp.file,
598 (u_long)region->last_ckp.offset);
599 --- 537,544 ----
600 __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
601 __db_err(dbenv, "%s %lx %s [%lu][%lu]",
602 "Maximum transaction ID",
603 ! txninfo == NULL ? TXN_MINIMUM :
604 ! ((DB_TXNHEAD *)txninfo)->maxid,
605 "Recovery checkpoint",
606 (u_long)region->last_ckp.file,
607 (u_long)region->last_ckp.offset);
608 ***************
609 *** 550,556 ****
610 (u_long)lsn.file, (u_long)lsn.offset, pass);
611 }
612
613 - done:
614 err: if (lockid != DB_LOCK_INVALIDID) {
615 if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
616 ret = t_ret;
617 --- 550,555 ----