]>
sourceware.org Git - glibc.git/blob - db2/btree/bt_delete.c
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)bt_delete.c 10.18 (Sleepycat) 8/24/97";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
64 static int __bam_dpages
__P((DB
*, BTREE
*));
68 * Delete the items referenced by a key.
70 * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
73 __bam_delete(argdbp
, txn
, key
, flags
)
82 db_indx_t cnt
, i
, indx
;
83 int dpage
, exact
, ret
, stack
;
85 DEBUG_LWRITE(argdbp
, txn
, "bam_delete", key
, NULL
, flags
);
89 /* Check for invalid flags. */
91 __db_delchk(argdbp
, flags
, F_ISSET(argdbp
, DB_AM_RDONLY
))) != 0)
94 GETHANDLE(argdbp
, txn
, &dbp
, ret
);
97 /* Search the tree for the key; delete only deletes exact matches. */
98 if ((ret
= __bam_search(dbp
, key
, S_DELETE
, 1, NULL
, &exact
)) != 0)
102 indx
= t
->bt_csp
->indx
;
104 /* Delete the key/data pair, including any duplicates. */
105 for (cnt
= 1, i
= indx
;; ++cnt
)
106 if ((i
+= P_INDX
) >= NUM_ENT(h
) || h
->inp
[i
] != h
->inp
[indx
])
108 for (; cnt
> 0; --cnt
, ++t
->lstat
.bt_deleted
)
109 if (__bam_ca_delete(dbp
, h
->pgno
, indx
, NULL
) != 0) {
110 GET_BKEYDATA(h
, indx
+ O_INDX
)->deleted
= 1;
112 } else if ((ret
= __bam_ditem(dbp
, h
, indx
)) != 0 ||
113 (ret
= __bam_ditem(dbp
, h
, indx
)) != 0)
116 /* If we're using record numbers, update internal page record counts. */
117 if (F_ISSET(dbp
, DB_BT_RECNUM
) && (ret
= __bam_adjust(dbp
, t
, -1)) != 0)
120 /* If the page is now empty, delete it. */
121 dpage
= NUM_ENT(h
) == 0 && h
->pgno
!= PGNO_ROOT
;
126 ret
= dpage
? __bam_dpage(dbp
, key
) : 0;
136 * Delete the items referenced by a key.
138 * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
141 __ram_delete(argdbp
, txn
, key
, flags
)
154 int exact
, ret
, stack
;
158 /* Check for invalid flags. */
160 __db_delchk(argdbp
, flags
, F_ISSET(argdbp
, DB_AM_RDONLY
))) != 0)
163 GETHANDLE(argdbp
, txn
, &dbp
, ret
);
166 /* Check the user's record number and fill in as necessary. */
167 if ((ret
= __ram_getno(argdbp
, key
, &recno
, 0)) != 0)
170 /* Search the tree for the key; delete only deletes exact matches. */
171 if ((ret
= __bam_rsearch(dbp
, &recno
, S_DELETE
, 1, &exact
)) != 0)
179 indx
= t
->bt_csp
->indx
;
182 /* If the record has already been deleted, we couldn't have found it. */
183 if (GET_BKEYDATA(h
, indx
)->deleted
) {
189 * If we're not renumbering records, replace the record with a marker
192 if (!F_ISSET(dbp
, DB_RE_RENUMBER
)) {
193 if ((ret
= __bam_ditem(dbp
, h
, indx
)) != 0)
199 memset(&hdr
, 0, sizeof(hdr
));
201 hdr
.size
= SSZA(BKEYDATA
, data
);
202 memset(&data
, 0, sizeof(data
));
203 data
.data
= (char *) "";
205 if ((ret
= __db_pitem(dbp
,
206 h
, indx
, BKEYDATA_SIZE(0), &hdr
, &data
)) != 0)
209 ++t
->lstat
.bt_deleted
;
213 /* Delete the item. */
214 if ((ret
= __bam_ditem(dbp
, h
, indx
)) != 0)
217 ++t
->lstat
.bt_deleted
;
218 if (t
->bt_recno
!= NULL
)
219 F_SET(t
->bt_recno
, RECNO_MODIFIED
);
221 /* Adjust the counts. */
222 __bam_adjust(dbp
, t
, -1);
224 /* Adjust the cursors. */
225 __ram_ca(dbp
, recno
, CA_DELETE
);
228 * If the page is now empty, delete it -- we have the whole tree
229 * locked, so there are no preparations to make. Else, release
232 if (NUM_ENT(h
) == 0 && h
->pgno
!= PGNO_ROOT
) {
234 ret
= __bam_dpages(dbp
, t
);
247 * Delete one or more entries from a page.
249 * PUBLIC: int __bam_ditem __P((DB *, PAGE *, u_int32_t));
252 __bam_ditem(dbp
, h
, indx
)
265 bi
= GET_BINTERNAL(h
, indx
);
269 nbytes
= BINTERNAL_SIZE(bi
->len
);
272 nbytes
= BKEYDATA_SIZE(bi
->len
);
275 return (__db_pgfmt(dbp
, h
->pgno
));
279 nbytes
= RINTERNAL_SIZE
;
283 * If it's a duplicate key, discard the index and don't touch
284 * the actual page item. This works because no data item can
285 * have an index that matches any other index so even if the
286 * data item is in an index "slot", it won't match any other
290 if (indx
> 0 && h
->inp
[indx
] == h
->inp
[indx
- P_INDX
])
291 return (__bam_adjindx(dbp
,
292 h
, indx
, indx
- P_INDX
, 0));
293 if (indx
< (u_int32_t
)(NUM_ENT(h
) - P_INDX
) &&
294 h
->inp
[indx
] == h
->inp
[indx
+ P_INDX
])
295 return (__bam_adjindx(dbp
,
296 h
, indx
, indx
+ O_INDX
, 0));
300 bk
= GET_BKEYDATA(h
, indx
);
304 nbytes
= BOVERFLOW_SIZE
;
306 offpage
: /* Delete duplicate/offpage chains. */
307 bo
= GET_BOVERFLOW(h
, indx
);
308 if (bo
->type
== B_DUPLICATE
) {
310 __db_ddup(dbp
, bo
->pgno
, __bam_free
)) != 0)
314 __db_doff(dbp
, bo
->pgno
, __bam_free
)) != 0)
318 nbytes
= BKEYDATA_SIZE(bk
->len
);
321 return (__db_pgfmt(dbp
, h
->pgno
));
325 return (__db_pgfmt(dbp
, h
->pgno
));
328 /* Delete the item. */
329 if ((ret
= __db_ditem(dbp
, h
, indx
, nbytes
)) != 0)
332 /* Mark the page dirty. */
333 return (memp_fset(dbp
->mpf
, h
, DB_MPOOL_DIRTY
));
338 * Adjust an index on the page.
340 * PUBLIC: int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
343 __bam_adjindx(dbp
, h
, indx
, indx_copy
, is_insert
)
346 u_int32_t indx
, indx_copy
;
352 /* Log the change. */
353 if (DB_LOGGING(dbp
) &&
354 (ret
= __bam_adj_log(dbp
->dbenv
->lg_info
, dbp
->txn
, &LSN(h
),
355 0, dbp
->log_fileid
, PGNO(h
), &LSN(h
), indx
, indx_copy
,
356 (u_int32_t
)is_insert
)) != 0)
360 copy
= h
->inp
[indx_copy
];
361 if (indx
!= NUM_ENT(h
))
362 memmove(&h
->inp
[indx
+ O_INDX
], &h
->inp
[indx
],
363 sizeof(db_indx_t
) * (NUM_ENT(h
) - indx
));
368 if (indx
!= NUM_ENT(h
))
369 memmove(&h
->inp
[indx
], &h
->inp
[indx
+ O_INDX
],
370 sizeof(db_indx_t
) * (NUM_ENT(h
) - indx
));
373 /* Mark the page dirty. */
374 ret
= memp_fset(dbp
->mpf
, h
, DB_MPOOL_DIRTY
);
376 /* Adjust the cursors. */
377 __bam_ca_di(dbp
, h
->pgno
, indx
, is_insert
? 1 : -1);
383 * Delete a page from the tree.
385 * PUBLIC: int __bam_dpage __P((DB *, const DBT *));
388 __bam_dpage(dbp
, key
)
396 int exact
, level
, ret
;
402 * The locking protocol is that we acquire locks by walking down the
403 * tree, to avoid the obvious deadlocks.
405 * Call __bam_search to reacquire the empty leaf page, but this time
406 * get both the leaf page and it's parent, locked. Walk back up the
407 * tree, until we have the top pair of pages that we want to delete.
408 * Once we have the top page that we want to delete locked, lock the
409 * underlying pages and check to make sure they're still empty. If
410 * they are, delete them.
412 for (level
= LEAFLEVEL
;; ++level
) {
413 /* Acquire a page and its parent, locked. */
415 __bam_search(dbp
, key
, S_WRPAIR
, level
, NULL
, &exact
)) != 0)
419 * If we reach the root or the page isn't going to be empty
420 * when we delete one record, quit.
422 h
= t
->bt_csp
[-1].page
;
423 if (h
->pgno
== PGNO_ROOT
|| NUM_ENT(h
) != 1)
426 /* Release the two locked pages. */
427 (void)memp_fput(dbp
->mpf
, t
->bt_csp
[-1].page
, 0);
428 (void)__BT_TLPUT(dbp
, t
->bt_csp
[-1].lock
);
429 (void)memp_fput(dbp
->mpf
, t
->bt_csp
[0].page
, 0);
430 (void)__BT_TLPUT(dbp
, t
->bt_csp
[0].lock
);
434 * Leave the stack pointer one after the last entry, we may be about
435 * to push more items on the stack.
440 * t->bt_csp[-2].page is the top page, which we're not going to delete,
441 * and t->bt_csp[-1].page is the first page we are going to delete.
443 * Walk down the chain, acquiring the rest of the pages until we've
444 * retrieved the leaf page. If we find any pages that aren't going
445 * to be emptied by the delete, someone else added something while we
446 * were walking the tree, and we discontinue the delete.
448 for (h
= t
->bt_csp
[-1].page
;;) {
458 * Get the next page, write lock it and push it onto the stack.
459 * We know it's index 0, because it can only have one element.
461 pgno
= TYPE(h
) == P_IBTREE
?
462 GET_BINTERNAL(h
, 0)->pgno
: GET_RINTERNAL(h
, 0)->pgno
;
464 if ((ret
= __bam_lget(dbp
, 0, pgno
, DB_LOCK_WRITE
, &lock
)) != 0)
466 if ((ret
= __bam_pget(dbp
, &h
, &pgno
, 0)) != 0)
468 BT_STK_PUSH(t
, h
, 0, lock
, ret
);
474 return (__bam_dpages(dbp
, t
));
477 /* Discard any locked pages and return. */
485 * Delete a set of locked pages.
500 rcnt
= 0; /* XXX: Shut the compiler up. */
505 * There is an interesting deadlock situation here. We have to relink
506 * the leaf page chain around the leaf page being deleted. Consider
507 * a cursor walking through the leaf pages, that has the previous page
508 * read-locked and is waiting on a lock for the page we're deleting.
509 * It will deadlock here. This is a problem, because if our process is
510 * selected to resolve the deadlock, we'll leave an empty leaf page
511 * that we can never again access by walking down the tree. So, before
512 * we unlink the subtree, we relink the leaf page chain.
514 if ((ret
= __db_relink(dbp
, t
->bt_csp
->page
, NULL
, 1)) != 0)
518 * We have the entire stack of deletable pages locked. Start from the
519 * top of the tree and move to the bottom, as it's better to release
520 * the inner pages as soon as possible.
522 if ((ret
= __bam_ditem(dbp
, epg
->page
, epg
->indx
)) != 0)
526 * If we deleted the next-to-last item from the root page, the tree
527 * has collapsed a level. Try and write lock the remaining root + 1
528 * page and copy it onto the root page. If we can't get the lock,
529 * that's okay, the tree just stays a level deeper than we'd like.
532 if (h
->pgno
== PGNO_ROOT
&& NUM_ENT(h
) == 1) {
533 pgno
= TYPE(epg
->page
) == P_IBTREE
?
534 GET_BINTERNAL(epg
->page
, 0)->pgno
:
535 GET_RINTERNAL(epg
->page
, 0)->pgno
;
536 if ((ret
= __bam_lget(dbp
, 0, pgno
, DB_LOCK_WRITE
, &lock
)) != 0)
538 if ((ret
= __bam_pget(dbp
, &h
, &pgno
, 0)) != 0)
541 /* Log the change. */
542 if (DB_LOGGING(dbp
)) {
543 memset(&a
, 0, sizeof(a
));
545 a
.size
= dbp
->pgsize
;
546 memset(&b
, 0, sizeof(b
));
547 b
.data
= P_ENTRY(epg
->page
, 0);
548 b
.size
= BINTERNAL_SIZE(((BINTERNAL
*)b
.data
)->len
);
549 __bam_rsplit_log(dbp
->dbenv
->lg_info
, dbp
->txn
,
550 &h
->lsn
, 0, dbp
->log_fileid
, h
->pgno
, &a
, &b
,
557 * One fixup -- if the tree has record numbers and we're not
558 * converting to a leaf page, we have to preserve the total
561 if (TYPE(h
) == P_IRECNO
||
562 (TYPE(h
) == P_IBTREE
&& F_ISSET(dbp
, DB_BT_RECNUM
)))
563 rcnt
= RE_NREC(epg
->page
);
564 memcpy(epg
->page
, h
, dbp
->pgsize
);
565 epg
->page
->pgno
= PGNO_ROOT
;
566 if (TYPE(h
) == P_IRECNO
||
567 (TYPE(h
) == P_IBTREE
&& F_ISSET(dbp
, DB_BT_RECNUM
)))
568 RE_NREC_SET(epg
->page
, rcnt
);
570 /* Free the last page in that level of the btree. */
572 (void)__bam_free(dbp
, h
);
574 /* Adjust the cursors. */
575 __bam_ca_move(dbp
, t
, h
->pgno
, PGNO_ROOT
);
577 (void)__BT_TLPUT(dbp
, lock
);
580 /* Release the top page in the subtree. */
581 (void)memp_fput(dbp
->mpf
, epg
->page
, 0);
582 (void)__BT_TLPUT(dbp
, epg
->lock
);
585 * Free the rest of the pages.
588 * Don't bother checking for errors. We've unlinked the subtree from
589 * the tree, and there's no possibility of recovery.
591 for (; ++epg
<= t
->bt_csp
; ++t
->lstat
.bt_freed
) {
592 if (NUM_ENT(epg
->page
) != 0)
593 (void)__bam_ditem(dbp
, epg
->page
, epg
->indx
);
595 (void)__bam_free(dbp
, epg
->page
);
596 (void)__BT_TLPUT(dbp
, epg
->lock
);
601 /* Discard any remaining pages and return. */
602 for (; epg
<= t
->bt_csp
; ++epg
) {
603 (void)memp_fput(dbp
->mpf
, epg
->page
, 0);
604 (void)__BT_TLPUT(dbp
, epg
->lock
);
This page took 0.095151 seconds and 5 git commands to generate.