1 | /* |
---|
2 | Unix SMB/CIFS implementation. |
---|
3 | |
---|
4 | trivial database library |
---|
5 | |
---|
6 | Copyright (C) Andrew Tridgell 1999-2004 |
---|
7 | Copyright (C) Paul `Rusty' Russell 2000 |
---|
8 | Copyright (C) Jeremy Allison 2000-2003 |
---|
9 | |
---|
10 | ** NOTE! The following LGPL license applies to the tdb |
---|
11 | ** library. This does NOT imply that all of Samba is released |
---|
12 | ** under the LGPL |
---|
13 | |
---|
14 | This library is free software; you can redistribute it and/or |
---|
15 | modify it under the terms of the GNU Lesser General Public |
---|
16 | License as published by the Free Software Foundation; either |
---|
17 | version 2 of the License, or (at your option) any later version. |
---|
18 | |
---|
19 | This library is distributed in the hope that it will be useful, |
---|
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
22 | Lesser General Public License for more details. |
---|
23 | |
---|
24 | You should have received a copy of the GNU Lesser General Public |
---|
25 | License along with this library; if not, write to the Free Software |
---|
26 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
27 | */ |
---|
28 | |
---|
29 | |
---|
30 | #ifndef _SAMBA_BUILD_ |
---|
31 | #if HAVE_CONFIG_H |
---|
32 | #include <config.h> |
---|
33 | #endif |
---|
34 | |
---|
35 | #include <stdlib.h> |
---|
36 | #include <stdio.h> |
---|
37 | #include <stdint.h> |
---|
38 | #include <fcntl.h> |
---|
39 | #include <unistd.h> |
---|
40 | #include <string.h> |
---|
41 | #include <fcntl.h> |
---|
42 | #include <errno.h> |
---|
43 | #include <sys/mman.h> |
---|
44 | #include <sys/stat.h> |
---|
45 | #include "tdb.h" |
---|
46 | #include <stdarg.h> |
---|
47 | #include "talloc.h" |
---|
48 | #undef HAVE_MMAP |
---|
49 | #else |
---|
50 | #include "includes.h" |
---|
51 | #include "lib/tdb/include/tdb.h" |
---|
52 | #include "system/time.h" |
---|
53 | #include "system/shmem.h" |
---|
54 | #include "system/filesys.h" |
---|
55 | #endif |
---|
56 | |
---|
57 | #define TDB_MAGIC_FOOD "TDB file\n" |
---|
58 | #define TDB_VERSION (0x26011967 + 6) |
---|
59 | #define TDB_MAGIC (0x26011999U) |
---|
60 | #define TDB_FREE_MAGIC (~TDB_MAGIC) |
---|
61 | #define TDB_DEAD_MAGIC (0xFEE1DEAD) |
---|
62 | #define TDB_ALIGNMENT 4 |
---|
63 | #define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT) |
---|
64 | #define DEFAULT_HASH_SIZE 131 |
---|
65 | #define TDB_PAGE_SIZE 0x2000 |
---|
66 | #define FREELIST_TOP (sizeof(struct tdb_header)) |
---|
67 | #define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1)) |
---|
68 | #define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24)) |
---|
69 | #define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC) |
---|
70 | #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r)) |
---|
71 | #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off)) |
---|
72 | #define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1)) |
---|
73 | |
---|
74 | |
---|
75 | /* NB assumes there is a local variable called "tdb" that is the |
---|
76 | * current context, also takes doubly-parenthesized print-style |
---|
77 | * argument. */ |
---|
78 | #define TDB_LOG(x) tdb->log_fn x |
---|
79 | |
---|
80 | /* lock offsets */ |
---|
81 | #define GLOBAL_LOCK 0 |
---|
82 | #define ACTIVE_LOCK 4 |
---|
83 | |
---|
84 | #ifndef MAP_FILE |
---|
85 | #define MAP_FILE 0 |
---|
86 | #endif |
---|
87 | |
---|
88 | #ifndef MAP_FAILED |
---|
89 | #define MAP_FAILED ((void *)-1) |
---|
90 | #endif |
---|
91 | |
---|
92 | #ifndef discard_const_p |
---|
93 | # if defined(__intptr_t_defined) || defined(HAVE_INTPTR_T) |
---|
94 | # define discard_const(ptr) ((void *)((intptr_t)(ptr))) |
---|
95 | # else |
---|
96 | # define discard_const(ptr) ((void *)(ptr)) |
---|
97 | # endif |
---|
98 | # define discard_const_p(type, ptr) ((type *)discard_const(ptr)) |
---|
99 | #endif |
---|
100 | |
---|
101 | /* free memory if the pointer is valid and zero the pointer */ |
---|
102 | #ifndef SAFE_FREE |
---|
103 | #define SAFE_FREE(x) do { if ((x) != NULL) {talloc_free(discard_const_p(void *, (x))); (x)=NULL;} } while(0) |
---|
104 | #endif |
---|
105 | |
---|
106 | #define BUCKET(hash) ((hash) % tdb->header.hash_size) |
---|
107 | TDB_DATA tdb_null; |
---|
108 | |
---|
109 | /* all contexts, to ensure no double-opens (fcntl locks don't nest!) */ |
---|
110 | static TDB_CONTEXT *tdbs = NULL; |
---|
111 | |
---|
112 | static int tdb_munmap(TDB_CONTEXT *tdb) |
---|
113 | { |
---|
114 | if (tdb->flags & TDB_INTERNAL) |
---|
115 | return 0; |
---|
116 | |
---|
117 | #ifdef HAVE_MMAP |
---|
118 | if (tdb->map_ptr) { |
---|
119 | int ret = munmap(tdb->map_ptr, tdb->map_size); |
---|
120 | if (ret != 0) |
---|
121 | return ret; |
---|
122 | } |
---|
123 | #endif |
---|
124 | tdb->map_ptr = NULL; |
---|
125 | return 0; |
---|
126 | } |
---|
127 | |
---|
128 | static void tdb_mmap(TDB_CONTEXT *tdb) |
---|
129 | { |
---|
130 | if (tdb->flags & TDB_INTERNAL) |
---|
131 | return; |
---|
132 | |
---|
133 | #ifdef HAVE_MMAP |
---|
134 | if (!(tdb->flags & TDB_NOMMAP)) { |
---|
135 | tdb->map_ptr = mmap(NULL, tdb->map_size, |
---|
136 | PROT_READ|(tdb->read_only? 0:PROT_WRITE), |
---|
137 | MAP_SHARED|MAP_FILE, tdb->fd, 0); |
---|
138 | |
---|
139 | /* |
---|
140 | * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! |
---|
141 | */ |
---|
142 | |
---|
143 | if (tdb->map_ptr == MAP_FAILED) { |
---|
144 | tdb->map_ptr = NULL; |
---|
145 | TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n", |
---|
146 | tdb->map_size, strerror(errno))); |
---|
147 | } |
---|
148 | } else { |
---|
149 | tdb->map_ptr = NULL; |
---|
150 | } |
---|
151 | #else |
---|
152 | tdb->map_ptr = NULL; |
---|
153 | #endif |
---|
154 | } |
---|
155 | |
---|
156 | /* Endian conversion: we only ever deal with 4 byte quantities */ |
---|
157 | static void *convert(void *buf, uint32_t size) |
---|
158 | { |
---|
159 | uint32_t i, *p = buf; |
---|
160 | for (i = 0; i < size / 4; i++) |
---|
161 | p[i] = TDB_BYTEREV(p[i]); |
---|
162 | return buf; |
---|
163 | } |
---|
164 | #define DOCONV() (tdb->flags & TDB_CONVERT) |
---|
165 | #define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x) |
---|
166 | |
---|
167 | /* the body of the database is made of one list_struct for the free space |
---|
168 | plus a separate data list for each hash value */ |
---|
169 | struct list_struct { |
---|
170 | tdb_off next; /* offset of the next record in the list */ |
---|
171 | tdb_len rec_len; /* total byte length of record */ |
---|
172 | tdb_len key_len; /* byte length of key */ |
---|
173 | tdb_len data_len; /* byte length of data */ |
---|
174 | uint32_t full_hash; /* the full 32 bit hash of the key */ |
---|
175 | uint32_t magic; /* try to catch errors */ |
---|
176 | /* the following union is implied: |
---|
177 | union { |
---|
178 | char record[rec_len]; |
---|
179 | struct { |
---|
180 | char key[key_len]; |
---|
181 | char data[data_len]; |
---|
182 | } |
---|
183 | uint32_t totalsize; (tailer) |
---|
184 | } |
---|
185 | */ |
---|
186 | }; |
---|
187 | |
---|
188 | /* a byte range locking function - return 0 on success |
---|
189 | this functions locks/unlocks 1 byte at the specified offset. |
---|
190 | |
---|
191 | On error, errno is also set so that errors are passed back properly |
---|
192 | through tdb_open(). */ |
---|
193 | static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, |
---|
194 | int rw_type, int lck_type, int probe) |
---|
195 | { |
---|
196 | struct flock fl; |
---|
197 | int ret; |
---|
198 | |
---|
199 | if (tdb->flags & TDB_NOLOCK) |
---|
200 | return 0; |
---|
201 | if ((rw_type == F_WRLCK) && (tdb->read_only)) { |
---|
202 | errno = EACCES; |
---|
203 | return -1; |
---|
204 | } |
---|
205 | |
---|
206 | fl.l_type = rw_type; |
---|
207 | fl.l_whence = SEEK_SET; |
---|
208 | fl.l_start = offset; |
---|
209 | fl.l_len = 1; |
---|
210 | fl.l_pid = 0; |
---|
211 | |
---|
212 | do { |
---|
213 | ret = fcntl(tdb->fd,lck_type,&fl); |
---|
214 | } while (ret == -1 && errno == EINTR); |
---|
215 | |
---|
216 | if (ret == -1) { |
---|
217 | if (!probe && lck_type != F_SETLK) { |
---|
218 | /* Ensure error code is set for log fun to examine. */ |
---|
219 | tdb->ecode = TDB_ERR_LOCK; |
---|
220 | TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n", |
---|
221 | tdb->fd, offset, rw_type, lck_type)); |
---|
222 | } |
---|
223 | /* Generic lock error. errno set by fcntl. |
---|
224 | * EAGAIN is an expected return from non-blocking |
---|
225 | * locks. */ |
---|
226 | if (errno != EAGAIN) { |
---|
227 | TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n", |
---|
228 | tdb->fd, offset, rw_type, lck_type, |
---|
229 | strerror(errno))); |
---|
230 | } |
---|
231 | return TDB_ERRCODE(TDB_ERR_LOCK, -1); |
---|
232 | } |
---|
233 | return 0; |
---|
234 | } |
---|
235 | |
---|
236 | /* lock a list in the database. list -1 is the alloc list */ |
---|
237 | static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype) |
---|
238 | { |
---|
239 | if (list < -1 || list >= (int)tdb->header.hash_size) { |
---|
240 | TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n", |
---|
241 | list, ltype)); |
---|
242 | return -1; |
---|
243 | } |
---|
244 | if (tdb->flags & TDB_NOLOCK) |
---|
245 | return 0; |
---|
246 | |
---|
247 | /* Since fcntl locks don't nest, we do a lock for the first one, |
---|
248 | and simply bump the count for future ones */ |
---|
249 | if (tdb->locked[list+1].count == 0) { |
---|
250 | if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) { |
---|
251 | TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n", |
---|
252 | list, ltype, strerror(errno))); |
---|
253 | return -1; |
---|
254 | } |
---|
255 | tdb->locked[list+1].ltype = ltype; |
---|
256 | } |
---|
257 | tdb->locked[list+1].count++; |
---|
258 | return 0; |
---|
259 | } |
---|
260 | |
---|
261 | /* unlock the database: returns void because it's too late for errors. */ |
---|
262 | /* changed to return int it may be interesting to know there |
---|
263 | has been an error --simo */ |
---|
264 | static int tdb_unlock(TDB_CONTEXT *tdb, int list, |
---|
265 | int ltype __attribute__((unused))) |
---|
266 | { |
---|
267 | int ret = -1; |
---|
268 | |
---|
269 | if (tdb->flags & TDB_NOLOCK) |
---|
270 | return 0; |
---|
271 | |
---|
272 | /* Sanity checks */ |
---|
273 | if (list < -1 || list >= (int)tdb->header.hash_size) { |
---|
274 | TDB_LOG((tdb, 0, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size)); |
---|
275 | return ret; |
---|
276 | } |
---|
277 | |
---|
278 | if (tdb->locked[list+1].count==0) { |
---|
279 | TDB_LOG((tdb, 0, "tdb_unlock: count is 0\n")); |
---|
280 | return ret; |
---|
281 | } |
---|
282 | |
---|
283 | if (tdb->locked[list+1].count == 1) { |
---|
284 | /* Down to last nested lock: unlock underneath */ |
---|
285 | ret = tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0); |
---|
286 | } else { |
---|
287 | ret = 0; |
---|
288 | } |
---|
289 | tdb->locked[list+1].count--; |
---|
290 | |
---|
291 | if (ret) |
---|
292 | TDB_LOG((tdb, 0,"tdb_unlock: An error occurred unlocking!\n")); |
---|
293 | return ret; |
---|
294 | } |
---|
295 | |
---|
296 | /* This is based on the hash algorithm from gdbm */ |
---|
297 | static uint32_t default_tdb_hash(TDB_DATA *key) |
---|
298 | { |
---|
299 | uint32_t value; /* Used to compute the hash value. */ |
---|
300 | uint32_t i; /* Used to cycle through random values. */ |
---|
301 | |
---|
302 | /* Set the initial value from the key size. */ |
---|
303 | for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) |
---|
304 | value = (value + (key->dptr[i] << (i*5 % 24))); |
---|
305 | |
---|
306 | return (1103515243 * value + 12345); |
---|
307 | } |
---|
308 | |
---|
309 | /* check for an out of bounds access - if it is out of bounds then |
---|
310 | see if the database has been expanded by someone else and expand |
---|
311 | if necessary |
---|
312 | note that "len" is the minimum length needed for the db |
---|
313 | */ |
---|
314 | static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe) |
---|
315 | { |
---|
316 | struct stat st; |
---|
317 | if (len <= tdb->map_size) |
---|
318 | return 0; |
---|
319 | if (tdb->flags & TDB_INTERNAL) { |
---|
320 | if (!probe) { |
---|
321 | /* Ensure ecode is set for log fn. */ |
---|
322 | tdb->ecode = TDB_ERR_IO; |
---|
323 | TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n", |
---|
324 | (int)len, (int)tdb->map_size)); |
---|
325 | } |
---|
326 | return TDB_ERRCODE(TDB_ERR_IO, -1); |
---|
327 | } |
---|
328 | |
---|
329 | if (fstat(tdb->fd, &st) == -1) |
---|
330 | return TDB_ERRCODE(TDB_ERR_IO, -1); |
---|
331 | |
---|
332 | if (st.st_size < (off_t)len) { |
---|
333 | if (!probe) { |
---|
334 | /* Ensure ecode is set for log fn. */ |
---|
335 | tdb->ecode = TDB_ERR_IO; |
---|
336 | TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n", |
---|
337 | (int)len, (int)st.st_size)); |
---|
338 | } |
---|
339 | return TDB_ERRCODE(TDB_ERR_IO, -1); |
---|
340 | } |
---|
341 | |
---|
342 | /* Unmap, update size, remap */ |
---|
343 | if (tdb_munmap(tdb) == -1) |
---|
344 | return TDB_ERRCODE(TDB_ERR_IO, -1); |
---|
345 | tdb->map_size = st.st_size; |
---|
346 | tdb_mmap(tdb); |
---|
347 | return 0; |
---|
348 | } |
---|
349 | |
---|
350 | /* write a lump of data at a specified offset */ |
---|
351 | static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len) |
---|
352 | { |
---|
353 | if (tdb_oob(tdb, off + len, 0) != 0) |
---|
354 | return -1; |
---|
355 | |
---|
356 | if (tdb->map_ptr) |
---|
357 | memcpy(off + (char *)tdb->map_ptr, buf, len); |
---|
358 | #ifdef HAVE_PWRITE |
---|
359 | else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) { |
---|
360 | #else |
---|
361 | else if (lseek(tdb->fd, off, SEEK_SET) != (off_t)off |
---|
362 | || write(tdb->fd, buf, len) != (off_t)len) { |
---|
363 | #endif |
---|
364 | /* Ensure ecode is set for log fn. */ |
---|
365 | tdb->ecode = TDB_ERR_IO; |
---|
366 | TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n", |
---|
367 | off, len, strerror(errno))); |
---|
368 | return TDB_ERRCODE(TDB_ERR_IO, -1); |
---|
369 | } |
---|
370 | return 0; |
---|
371 | } |
---|
372 | |
---|
373 | /* read a lump of data at a specified offset, maybe convert */ |
---|
374 | static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv) |
---|
375 | { |
---|
376 | if (tdb_oob(tdb, off + len, 0) != 0) |
---|
377 | return -1; |
---|
378 | |
---|
379 | if (tdb->map_ptr) |
---|
380 | memcpy(buf, off + (char *)tdb->map_ptr, len); |
---|
381 | #ifdef HAVE_PREAD |
---|
382 | else if (pread(tdb->fd, buf, len, off) != (off_t)len) { |
---|
383 | #else |
---|
384 | else if (lseek(tdb->fd, off, SEEK_SET) != (off_t)off |
---|
385 | || read(tdb->fd, buf, len) != (off_t)len) { |
---|
386 | #endif |
---|
387 | /* Ensure ecode is set for log fn. */ |
---|
388 | tdb->ecode = TDB_ERR_IO; |
---|
389 | TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n", |
---|
390 | off, len, strerror(errno))); |
---|
391 | return TDB_ERRCODE(TDB_ERR_IO, -1); |
---|
392 | } |
---|
393 | if (cv) |
---|
394 | convert(buf, len); |
---|
395 | return 0; |
---|
396 | } |
---|
397 | |
---|
398 | /* don't allocate memory: used in tdb_delete path. */ |
---|
399 | static int tdb_key_eq(TDB_CONTEXT *tdb, tdb_off off, TDB_DATA key) |
---|
400 | { |
---|
401 | char buf[64]; |
---|
402 | uint32_t len; |
---|
403 | |
---|
404 | if (tdb_oob(tdb, off + key.dsize, 0) != 0) |
---|
405 | return -1; |
---|
406 | |
---|
407 | if (tdb->map_ptr) |
---|
408 | return !memcmp(off + (char*)tdb->map_ptr, key.dptr, key.dsize); |
---|
409 | |
---|
410 | while (key.dsize) { |
---|
411 | len = key.dsize; |
---|
412 | if (len > sizeof(buf)) |
---|
413 | len = sizeof(buf); |
---|
414 | if (tdb_read(tdb, off, buf, len, 0) != 0) |
---|
415 | return -1; |
---|
416 | if (memcmp(buf, key.dptr, len) != 0) |
---|
417 | return 0; |
---|
418 | key.dptr += len; |
---|
419 | key.dsize -= len; |
---|
420 | off += len; |
---|
421 | } |
---|
422 | return 1; |
---|
423 | } |
---|
424 | |
---|
425 | /* read a lump of data, allocating the space for it */ |
---|
426 | static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len) |
---|
427 | { |
---|
428 | char *buf; |
---|
429 | |
---|
430 | if (!(buf = talloc_size(tdb, len))) { |
---|
431 | /* Ensure ecode is set for log fn. */ |
---|
432 | tdb->ecode = TDB_ERR_OOM; |
---|
433 | TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n", |
---|
434 | len, strerror(errno))); |
---|
435 | return TDB_ERRCODE(TDB_ERR_OOM, buf); |
---|
436 | } |
---|
437 | if (tdb_read(tdb, offset, buf, len, 0) == -1) { |
---|
438 | SAFE_FREE(buf); |
---|
439 | return NULL; |
---|
440 | } |
---|
441 | return buf; |
---|
442 | } |
---|
443 | |
---|
444 | /* read/write a tdb_off */ |
---|
445 | static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) |
---|
446 | { |
---|
447 | return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV()); |
---|
448 | } |
---|
449 | static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) |
---|
450 | { |
---|
451 | tdb_off off = *d; |
---|
452 | return tdb_write(tdb, offset, CONVERT(off), sizeof(*d)); |
---|
453 | } |
---|
454 | |
---|
455 | /* read/write a record */ |
---|
456 | static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) |
---|
457 | { |
---|
458 | if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1) |
---|
459 | return -1; |
---|
460 | if (TDB_BAD_MAGIC(rec)) { |
---|
461 | /* Ensure ecode is set for log fn. */ |
---|
462 | tdb->ecode = TDB_ERR_CORRUPT; |
---|
463 | TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset)); |
---|
464 | return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); |
---|
465 | } |
---|
466 | return tdb_oob(tdb, rec->next+sizeof(*rec), 0); |
---|
467 | } |
---|
468 | static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) |
---|
469 | { |
---|
470 | struct list_struct r = *rec; |
---|
471 | return tdb_write(tdb, offset, CONVERT(r), sizeof(r)); |
---|
472 | } |
---|
473 | |
---|
474 | /* read a freelist record and check for simple errors */ |
---|
475 | static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec) |
---|
476 | { |
---|
477 | if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1) |
---|
478 | return -1; |
---|
479 | |
---|
480 | if (rec->magic == TDB_MAGIC) { |
---|
481 | /* this happens when a app is showdown while deleting a record - we should |
---|
482 | not completely fail when this happens */ |
---|
483 | TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n", |
---|
484 | rec->magic, off)); |
---|
485 | rec->magic = TDB_FREE_MAGIC; |
---|
486 | if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1) |
---|
487 | return -1; |
---|
488 | } |
---|
489 | |
---|
490 | if (rec->magic != TDB_FREE_MAGIC) { |
---|
491 | /* Ensure ecode is set for log fn. */ |
---|
492 | tdb->ecode = TDB_ERR_CORRUPT; |
---|
493 | TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n", |
---|
494 | rec->magic, off)); |
---|
495 | return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); |
---|
496 | } |
---|
497 | if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0) |
---|
498 | return -1; |
---|
499 | return 0; |
---|
500 | } |
---|
501 | |
---|
502 | /* update a record tailer (must hold allocation lock) */ |
---|
503 | static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset, |
---|
504 | const struct list_struct *rec) |
---|
505 | { |
---|
506 | tdb_off totalsize; |
---|
507 | |
---|
508 | /* Offset of tailer from record header */ |
---|
509 | totalsize = sizeof(*rec) + rec->rec_len; |
---|
510 | return ofs_write(tdb, offset + totalsize - sizeof(tdb_off), |
---|
511 | &totalsize); |
---|
512 | } |
---|
513 | |
---|
514 | static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset) |
---|
515 | { |
---|
516 | struct list_struct rec; |
---|
517 | tdb_off tailer_ofs, tailer; |
---|
518 | |
---|
519 | if (tdb_read(tdb, offset, (char *)&rec, sizeof(rec), DOCONV()) == -1) { |
---|
520 | printf("ERROR: failed to read record at %u\n", offset); |
---|
521 | return 0; |
---|
522 | } |
---|
523 | |
---|
524 | printf(" rec: offset=0x%08x next=0x%08x rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n", |
---|
525 | offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic); |
---|
526 | |
---|
527 | tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off); |
---|
528 | if (ofs_read(tdb, tailer_ofs, &tailer) == -1) { |
---|
529 | printf("ERROR: failed to read tailer at %u\n", tailer_ofs); |
---|
530 | return rec.next; |
---|
531 | } |
---|
532 | |
---|
533 | if (tailer != rec.rec_len + sizeof(rec)) { |
---|
534 | printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n", |
---|
535 | (unsigned int)tailer, (unsigned int)(rec.rec_len + sizeof(rec))); |
---|
536 | } |
---|
537 | return rec.next; |
---|
538 | } |
---|
539 | |
---|
540 | static int tdb_dump_chain(TDB_CONTEXT *tdb, int i) |
---|
541 | { |
---|
542 | tdb_off rec_ptr, top; |
---|
543 | |
---|
544 | top = TDB_HASH_TOP(i); |
---|
545 | |
---|
546 | if (tdb_lock(tdb, i, F_WRLCK) != 0) |
---|
547 | return -1; |
---|
548 | |
---|
549 | if (ofs_read(tdb, top, &rec_ptr) == -1) |
---|
550 | return tdb_unlock(tdb, i, F_WRLCK); |
---|
551 | |
---|
552 | if (rec_ptr) |
---|
553 | printf("hash=%d\n", i); |
---|
554 | |
---|
555 | while (rec_ptr) { |
---|
556 | rec_ptr = tdb_dump_record(tdb, rec_ptr); |
---|
557 | } |
---|
558 | |
---|
559 | return tdb_unlock(tdb, i, F_WRLCK); |
---|
560 | } |
---|
561 | |
---|
562 | void tdb_dump_all(TDB_CONTEXT *tdb) |
---|
563 | { |
---|
564 | unsigned int i; |
---|
565 | for (i=0;i<tdb->header.hash_size;i++) { |
---|
566 | tdb_dump_chain(tdb, i); |
---|
567 | } |
---|
568 | printf("freelist:\n"); |
---|
569 | tdb_dump_chain(tdb, -1); |
---|
570 | } |
---|
571 | |
---|
572 | int tdb_printfreelist(TDB_CONTEXT *tdb) |
---|
573 | { |
---|
574 | int ret; |
---|
575 | long total_free = 0; |
---|
576 | tdb_off offset, rec_ptr; |
---|
577 | struct list_struct rec; |
---|
578 | |
---|
579 | if ((ret = tdb_lock(tdb, -1, F_WRLCK)) != 0) |
---|
580 | return ret; |
---|
581 | |
---|
582 | offset = FREELIST_TOP; |
---|
583 | |
---|
584 | /* read in the freelist top */ |
---|
585 | if (ofs_read(tdb, offset, &rec_ptr) == -1) { |
---|
586 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
587 | return 0; |
---|
588 | } |
---|
589 | |
---|
590 | printf("freelist top=[0x%08x]\n", rec_ptr ); |
---|
591 | while (rec_ptr) { |
---|
592 | if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec), DOCONV()) == -1) { |
---|
593 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
594 | return -1; |
---|
595 | } |
---|
596 | |
---|
597 | if (rec.magic != TDB_FREE_MAGIC) { |
---|
598 | printf("bad magic 0x%08x in free list\n", rec.magic); |
---|
599 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
600 | return -1; |
---|
601 | } |
---|
602 | |
---|
603 | printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n", |
---|
604 | rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len); |
---|
605 | total_free += rec.rec_len; |
---|
606 | |
---|
607 | /* move to the next record */ |
---|
608 | rec_ptr = rec.next; |
---|
609 | } |
---|
610 | printf("total rec_len = [0x%08x (%d)]\n", (int)total_free, |
---|
611 | (int)total_free); |
---|
612 | |
---|
613 | return tdb_unlock(tdb, -1, F_WRLCK); |
---|
614 | } |
---|
615 | |
---|
616 | /* Remove an element from the freelist. Must have alloc lock. */ |
---|
617 | static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next) |
---|
618 | { |
---|
619 | tdb_off last_ptr, i; |
---|
620 | |
---|
621 | /* read in the freelist top */ |
---|
622 | last_ptr = FREELIST_TOP; |
---|
623 | while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) { |
---|
624 | if (i == off) { |
---|
625 | /* We've found it! */ |
---|
626 | return ofs_write(tdb, last_ptr, &next); |
---|
627 | } |
---|
628 | /* Follow chain (next offset is at start of record) */ |
---|
629 | last_ptr = i; |
---|
630 | } |
---|
631 | TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off)); |
---|
632 | return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); |
---|
633 | } |
---|
634 | |
---|
635 | /* Add an element into the freelist. Merge adjacent records if |
---|
636 | neccessary. */ |
---|
637 | static int tdb_free(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) |
---|
638 | { |
---|
639 | tdb_off right, left; |
---|
640 | |
---|
641 | /* Allocation and tailer lock */ |
---|
642 | if (tdb_lock(tdb, -1, F_WRLCK) != 0) |
---|
643 | return -1; |
---|
644 | |
---|
645 | /* set an initial tailer, so if we fail we don't leave a bogus record */ |
---|
646 | if (update_tailer(tdb, offset, rec) != 0) { |
---|
647 | TDB_LOG((tdb, 0, "tdb_free: upfate_tailer failed!\n")); |
---|
648 | goto fail; |
---|
649 | } |
---|
650 | |
---|
651 | /* Look right first (I'm an Australian, dammit) */ |
---|
652 | right = offset + sizeof(*rec) + rec->rec_len; |
---|
653 | if (right + sizeof(*rec) <= tdb->map_size) { |
---|
654 | struct list_struct r; |
---|
655 | |
---|
656 | if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) { |
---|
657 | TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right)); |
---|
658 | goto left; |
---|
659 | } |
---|
660 | |
---|
661 | /* If it's free, expand to include it. */ |
---|
662 | if (r.magic == TDB_FREE_MAGIC) { |
---|
663 | if (remove_from_freelist(tdb, right, r.next) == -1) { |
---|
664 | TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right)); |
---|
665 | goto left; |
---|
666 | } |
---|
667 | rec->rec_len += sizeof(r) + r.rec_len; |
---|
668 | } |
---|
669 | } |
---|
670 | |
---|
671 | left: |
---|
672 | /* Look left */ |
---|
673 | left = offset - sizeof(tdb_off); |
---|
674 | if (left > TDB_DATA_START(tdb->header.hash_size)) { |
---|
675 | struct list_struct l; |
---|
676 | tdb_off leftsize; |
---|
677 | |
---|
678 | /* Read in tailer and jump back to header */ |
---|
679 | if (ofs_read(tdb, left, &leftsize) == -1) { |
---|
680 | TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left)); |
---|
681 | goto update; |
---|
682 | } |
---|
683 | left = offset - leftsize; |
---|
684 | |
---|
685 | /* Now read in record */ |
---|
686 | if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) { |
---|
687 | TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize)); |
---|
688 | goto update; |
---|
689 | } |
---|
690 | |
---|
691 | /* If it's free, expand to include it. */ |
---|
692 | if (l.magic == TDB_FREE_MAGIC) { |
---|
693 | if (remove_from_freelist(tdb, left, l.next) == -1) { |
---|
694 | TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left)); |
---|
695 | goto update; |
---|
696 | } else { |
---|
697 | offset = left; |
---|
698 | rec->rec_len += leftsize; |
---|
699 | } |
---|
700 | } |
---|
701 | } |
---|
702 | |
---|
703 | update: |
---|
704 | if (update_tailer(tdb, offset, rec) == -1) { |
---|
705 | TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset)); |
---|
706 | goto fail; |
---|
707 | } |
---|
708 | |
---|
709 | /* Now, prepend to free list */ |
---|
710 | rec->magic = TDB_FREE_MAGIC; |
---|
711 | |
---|
712 | if (ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 || |
---|
713 | rec_write(tdb, offset, rec) == -1 || |
---|
714 | ofs_write(tdb, FREELIST_TOP, &offset) == -1) { |
---|
715 | TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset)); |
---|
716 | goto fail; |
---|
717 | } |
---|
718 | |
---|
719 | /* And we're done. */ |
---|
720 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
721 | return 0; |
---|
722 | |
---|
723 | fail: |
---|
724 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
725 | return -1; |
---|
726 | } |
---|
727 | |
---|
728 | |
---|
729 | /* expand a file. we prefer to use ftruncate, as that is what posix |
---|
730 | says to use for mmap expansion */ |
---|
731 | static int expand_file(TDB_CONTEXT *tdb, tdb_off size, tdb_off addition) |
---|
732 | { |
---|
733 | char buf[1024]; |
---|
734 | #if HAVE_FTRUNCATE_EXTEND |
---|
735 | if (ftruncate(tdb->fd, size+addition) != 0) { |
---|
736 | TDB_LOG((tdb, 0, "expand_file ftruncate to %d failed (%s)\n", |
---|
737 | size+addition, strerror(errno))); |
---|
738 | return -1; |
---|
739 | } |
---|
740 | #else |
---|
741 | char b = 0; |
---|
742 | |
---|
743 | #ifdef HAVE_PWRITE |
---|
744 | if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) { |
---|
745 | #else |
---|
746 | if (lseek(tdb->fd, (size+addition) - 1, SEEK_SET) != (off_t)(size+addition) - 1 || |
---|
747 | write(tdb->fd, &b, 1) != 1) { |
---|
748 | #endif |
---|
749 | TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n", |
---|
750 | size+addition, strerror(errno))); |
---|
751 | return -1; |
---|
752 | } |
---|
753 | #endif |
---|
754 | |
---|
755 | /* now fill the file with something. This ensures that the file isn't sparse, which would be |
---|
756 | very bad if we ran out of disk. This must be done with write, not via mmap */ |
---|
757 | memset(buf, 0x42, sizeof(buf)); |
---|
758 | while (addition) { |
---|
759 | int n = addition>sizeof(buf)?sizeof(buf):addition; |
---|
760 | #ifdef HAVE_PWRITE |
---|
761 | int ret = pwrite(tdb->fd, buf, n, size); |
---|
762 | #else |
---|
763 | int ret; |
---|
764 | if (lseek(tdb->fd, size, SEEK_SET) != (off_t)size) |
---|
765 | return -1; |
---|
766 | ret = write(tdb->fd, buf, n); |
---|
767 | #endif |
---|
768 | if (ret != n) { |
---|
769 | TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n", |
---|
770 | n, strerror(errno))); |
---|
771 | return -1; |
---|
772 | } |
---|
773 | addition -= n; |
---|
774 | size += n; |
---|
775 | } |
---|
776 | return 0; |
---|
777 | } |
---|
778 | |
---|
779 | |
---|
780 | /* expand the database at least size bytes by expanding the underlying |
---|
781 | file and doing the mmap again if necessary */ |
---|
782 | static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size) |
---|
783 | { |
---|
784 | struct list_struct rec; |
---|
785 | tdb_off offset; |
---|
786 | |
---|
787 | if (tdb_lock(tdb, -1, F_WRLCK) == -1) { |
---|
788 | TDB_LOG((tdb, 0, "lock failed in tdb_expand\n")); |
---|
789 | return -1; |
---|
790 | } |
---|
791 | |
---|
792 | /* must know about any previous expansions by another process */ |
---|
793 | tdb_oob(tdb, tdb->map_size + 1, 1); |
---|
794 | |
---|
795 | /* always make room for at least 10 more records, and round |
---|
796 | the database up to a multiple of TDB_PAGE_SIZE */ |
---|
797 | size = TDB_ALIGN(tdb->map_size + size*10, TDB_PAGE_SIZE) - tdb->map_size; |
---|
798 | |
---|
799 | if (!(tdb->flags & TDB_INTERNAL)) |
---|
800 | tdb_munmap(tdb); |
---|
801 | |
---|
802 | /* |
---|
803 | * We must ensure the file is unmapped before doing this |
---|
804 | * to ensure consistency with systems like OpenBSD where |
---|
805 | * writes and mmaps are not consistent. |
---|
806 | */ |
---|
807 | |
---|
808 | /* expand the file itself */ |
---|
809 | if (!(tdb->flags & TDB_INTERNAL)) { |
---|
810 | if (expand_file(tdb, tdb->map_size, size) != 0) |
---|
811 | goto fail; |
---|
812 | } |
---|
813 | |
---|
814 | tdb->map_size += size; |
---|
815 | |
---|
816 | if (tdb->flags & TDB_INTERNAL) { |
---|
817 | char *new_map_ptr = talloc_realloc_size(tdb, tdb->map_ptr, |
---|
818 | tdb->map_size); |
---|
819 | if (!new_map_ptr) { |
---|
820 | tdb->map_size -= size; |
---|
821 | goto fail; |
---|
822 | } |
---|
823 | tdb->map_ptr = new_map_ptr; |
---|
824 | } else { |
---|
825 | /* |
---|
826 | * We must ensure the file is remapped before adding the space |
---|
827 | * to ensure consistency with systems like OpenBSD where |
---|
828 | * writes and mmaps are not consistent. |
---|
829 | */ |
---|
830 | |
---|
831 | /* We're ok if the mmap fails as we'll fallback to read/write */ |
---|
832 | tdb_mmap(tdb); |
---|
833 | } |
---|
834 | |
---|
835 | /* form a new freelist record */ |
---|
836 | memset(&rec,'\0',sizeof(rec)); |
---|
837 | rec.rec_len = size - sizeof(rec); |
---|
838 | |
---|
839 | /* link it into the free list */ |
---|
840 | offset = tdb->map_size - size; |
---|
841 | if (tdb_free(tdb, offset, &rec) == -1) |
---|
842 | goto fail; |
---|
843 | |
---|
844 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
845 | return 0; |
---|
846 | fail: |
---|
847 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
848 | return -1; |
---|
849 | } |
---|
850 | |
---|
851 | |
---|
852 | /* |
---|
853 | the core of tdb_allocate - called when we have decided which |
---|
854 | free list entry to use |
---|
855 | */ |
---|
856 | static tdb_off tdb_allocate_ofs(TDB_CONTEXT *tdb, tdb_len length, tdb_off rec_ptr, |
---|
857 | struct list_struct *rec, tdb_off last_ptr) |
---|
858 | { |
---|
859 | struct list_struct newrec; |
---|
860 | tdb_off newrec_ptr; |
---|
861 | |
---|
862 | memset(&newrec, '\0', sizeof(newrec)); |
---|
863 | |
---|
864 | /* found it - now possibly split it up */ |
---|
865 | if (rec->rec_len > length + MIN_REC_SIZE) { |
---|
866 | /* Length of left piece */ |
---|
867 | length = TDB_ALIGN(length, TDB_ALIGNMENT); |
---|
868 | |
---|
869 | /* Right piece to go on free list */ |
---|
870 | newrec.rec_len = rec->rec_len - (sizeof(*rec) + length); |
---|
871 | newrec_ptr = rec_ptr + sizeof(*rec) + length; |
---|
872 | |
---|
873 | /* And left record is shortened */ |
---|
874 | rec->rec_len = length; |
---|
875 | } else { |
---|
876 | newrec_ptr = 0; |
---|
877 | } |
---|
878 | |
---|
879 | /* Remove allocated record from the free list */ |
---|
880 | if (ofs_write(tdb, last_ptr, &rec->next) == -1) { |
---|
881 | return 0; |
---|
882 | } |
---|
883 | |
---|
884 | /* Update header: do this before we drop alloc |
---|
885 | lock, otherwise tdb_free() might try to |
---|
886 | merge with us, thinking we're free. |
---|
887 | (Thanks Jeremy Allison). */ |
---|
888 | rec->magic = TDB_MAGIC; |
---|
889 | if (rec_write(tdb, rec_ptr, rec) == -1) { |
---|
890 | return 0; |
---|
891 | } |
---|
892 | |
---|
893 | /* Did we create new block? */ |
---|
894 | if (newrec_ptr) { |
---|
895 | /* Update allocated record tailer (we |
---|
896 | shortened it). */ |
---|
897 | if (update_tailer(tdb, rec_ptr, rec) == -1) { |
---|
898 | return 0; |
---|
899 | } |
---|
900 | |
---|
901 | /* Free new record */ |
---|
902 | if (tdb_free(tdb, newrec_ptr, &newrec) == -1) { |
---|
903 | return 0; |
---|
904 | } |
---|
905 | } |
---|
906 | |
---|
907 | /* all done - return the new record offset */ |
---|
908 | return rec_ptr; |
---|
909 | } |
---|
910 | |
---|
911 | /* allocate some space from the free list. The offset returned points |
---|
912 | to a unconnected list_struct within the database with room for at |
---|
913 | least length bytes of total data |
---|
914 | |
---|
915 | 0 is returned if the space could not be allocated |
---|
916 | */ |
---|
917 | static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length, |
---|
918 | struct list_struct *rec) |
---|
919 | { |
---|
920 | tdb_off rec_ptr, last_ptr, newrec_ptr; |
---|
921 | struct { |
---|
922 | tdb_off rec_ptr, last_ptr; |
---|
923 | tdb_len rec_len; |
---|
924 | } bestfit = { 0, 0, 0 }; |
---|
925 | |
---|
926 | if (tdb_lock(tdb, -1, F_WRLCK) == -1) |
---|
927 | return 0; |
---|
928 | |
---|
929 | /* Extra bytes required for tailer */ |
---|
930 | length += sizeof(tdb_off); |
---|
931 | |
---|
932 | again: |
---|
933 | last_ptr = FREELIST_TOP; |
---|
934 | |
---|
935 | /* read in the freelist top */ |
---|
936 | if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) |
---|
937 | goto fail; |
---|
938 | |
---|
939 | bestfit.rec_ptr = 0; |
---|
940 | |
---|
941 | /* |
---|
942 | this is a best fit allocation strategy. Originally we used |
---|
943 | a first fit strategy, but it suffered from massive fragmentation |
---|
944 | issues when faced with a slowly increasing record size. |
---|
945 | */ |
---|
946 | while (rec_ptr) { |
---|
947 | if (rec_free_read(tdb, rec_ptr, rec) == -1) { |
---|
948 | goto fail; |
---|
949 | } |
---|
950 | |
---|
951 | if (rec->rec_len >= length) { |
---|
952 | if (bestfit.rec_ptr == 0 || |
---|
953 | rec->rec_len < bestfit.rec_len) { |
---|
954 | bestfit.rec_len = rec->rec_len; |
---|
955 | bestfit.rec_ptr = rec_ptr; |
---|
956 | bestfit.last_ptr = last_ptr; |
---|
957 | /* consider a fit to be good enough if we aren't wasting more than half the space */ |
---|
958 | if (bestfit.rec_len < 2*length) { |
---|
959 | break; |
---|
960 | } |
---|
961 | } |
---|
962 | } |
---|
963 | |
---|
964 | /* move to the next record */ |
---|
965 | last_ptr = rec_ptr; |
---|
966 | rec_ptr = rec->next; |
---|
967 | } |
---|
968 | |
---|
969 | if (bestfit.rec_ptr != 0) { |
---|
970 | if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) { |
---|
971 | goto fail; |
---|
972 | } |
---|
973 | |
---|
974 | newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr); |
---|
975 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
976 | return newrec_ptr; |
---|
977 | } |
---|
978 | |
---|
979 | /* we didn't find enough space. See if we can expand the |
---|
980 | database and if we can then try again */ |
---|
981 | if (tdb_expand(tdb, length + sizeof(*rec)) == 0) |
---|
982 | goto again; |
---|
983 | fail: |
---|
984 | tdb_unlock(tdb, -1, F_WRLCK); |
---|
985 | return 0; |
---|
986 | } |
---|
987 | |
---|
988 | /* initialise a new database with a specified hash size */ |
---|
989 | static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size) |
---|
990 | { |
---|
991 | struct tdb_header *newdb; |
---|
992 | int size, ret = -1; |
---|
993 | |
---|
994 | /* We make it up in memory, then write it out if not internal */ |
---|
995 | size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off); |
---|
996 | if (!(newdb = talloc_zero_size(tdb, size))) |
---|
997 | return TDB_ERRCODE(TDB_ERR_OOM, -1); |
---|
998 | |
---|
999 | /* Fill in the header */ |
---|
1000 | newdb->version = TDB_VERSION; |
---|
1001 | newdb->hash_size = hash_size; |
---|
1002 | if (tdb->flags & TDB_INTERNAL) { |
---|
1003 | tdb->map_size = size; |
---|
1004 | tdb->map_ptr = (char *)newdb; |
---|
1005 | memcpy(&tdb->header, newdb, sizeof(tdb->header)); |
---|
1006 | /* Convert the `ondisk' version if asked. */ |
---|
1007 | CONVERT(*newdb); |
---|
1008 | return 0; |
---|
1009 | } |
---|
1010 | if (lseek(tdb->fd, 0, SEEK_SET) == -1) |
---|
1011 | goto fail; |
---|
1012 | |
---|
1013 | if (ftruncate(tdb->fd, 0) == -1) |
---|
1014 | goto fail; |
---|
1015 | |
---|
1016 | /* This creates an endian-converted header, as if read from disk */ |
---|
1017 | CONVERT(*newdb); |
---|
1018 | memcpy(&tdb->header, newdb, sizeof(tdb->header)); |
---|
1019 | /* Don't endian-convert the magic food! */ |
---|
1020 | memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); |
---|
1021 | if (write(tdb->fd, newdb, size) != size) |
---|
1022 | ret = -1; |
---|
1023 | else |
---|
1024 | ret = 0; |
---|
1025 | |
---|
1026 | fail: |
---|
1027 | SAFE_FREE(newdb); |
---|
1028 | return ret; |
---|
1029 | } |
---|
1030 | |
---|
1031 | /* Returns 0 on fail. On success, return offset of record, and fills |
---|
1032 | in rec */ |
---|
1033 | static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, uint32_t hash, |
---|
1034 | struct list_struct *r) |
---|
1035 | { |
---|
1036 | tdb_off rec_ptr; |
---|
1037 | |
---|
1038 | /* read in the hash top */ |
---|
1039 | if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) |
---|
1040 | return 0; |
---|
1041 | |
---|
1042 | /* keep looking until we find the right record */ |
---|
1043 | while (rec_ptr) { |
---|
1044 | if (rec_read(tdb, rec_ptr, r) == -1) |
---|
1045 | return 0; |
---|
1046 | |
---|
1047 | if (!TDB_DEAD(r) && hash==r->full_hash && key.dsize==r->key_len) { |
---|
1048 | /* a very likely hit - read the key */ |
---|
1049 | int cmp = tdb_key_eq(tdb, rec_ptr + sizeof(*r), key); |
---|
1050 | if (cmp < 0) |
---|
1051 | return 0; |
---|
1052 | else if (cmp > 0) |
---|
1053 | return rec_ptr; |
---|
1054 | } |
---|
1055 | rec_ptr = r->next; |
---|
1056 | } |
---|
1057 | return TDB_ERRCODE(TDB_ERR_NOEXIST, 0); |
---|
1058 | } |
---|
1059 | |
---|
1060 | /* As tdb_find, but if you succeed, keep the lock */ |
---|
1061 | static tdb_off tdb_find_lock_hash(TDB_CONTEXT *tdb, TDB_DATA key, uint32_t hash, int locktype, |
---|
1062 | struct list_struct *rec) |
---|
1063 | { |
---|
1064 | uint32_t rec_ptr; |
---|
1065 | |
---|
1066 | if (tdb_lock(tdb, BUCKET(hash), locktype) == -1) |
---|
1067 | return 0; |
---|
1068 | if (!(rec_ptr = tdb_find(tdb, key, hash, rec))) |
---|
1069 | tdb_unlock(tdb, BUCKET(hash), locktype); |
---|
1070 | return rec_ptr; |
---|
1071 | } |
---|
1072 | |
---|
1073 | enum TDB_ERROR tdb_error(TDB_CONTEXT *tdb) |
---|
1074 | { |
---|
1075 | return tdb->ecode; |
---|
1076 | } |
---|
1077 | |
---|
1078 | static struct tdb_errname { |
---|
1079 | enum TDB_ERROR ecode; const char *estring; |
---|
1080 | } emap[] = { {TDB_SUCCESS, "Success"}, |
---|
1081 | {TDB_ERR_CORRUPT, "Corrupt database"}, |
---|
1082 | {TDB_ERR_IO, "IO Error"}, |
---|
1083 | {TDB_ERR_LOCK, "Locking error"}, |
---|
1084 | {TDB_ERR_OOM, "Out of memory"}, |
---|
1085 | {TDB_ERR_EXISTS, "Record exists"}, |
---|
1086 | {TDB_ERR_NOLOCK, "Lock exists on other keys"}, |
---|
1087 | {TDB_ERR_NOEXIST, "Record does not exist"} }; |
---|
1088 | |
---|
1089 | /* Error string for the last tdb error */ |
---|
1090 | const char *tdb_errorstr(TDB_CONTEXT *tdb) |
---|
1091 | { |
---|
1092 | uint32_t i; |
---|
1093 | for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++) |
---|
1094 | if (tdb->ecode == emap[i].ecode) |
---|
1095 | return emap[i].estring; |
---|
1096 | return "Invalid error code"; |
---|
1097 | } |
---|
1098 | |
---|
1099 | /* update an entry in place - this only works if the new data size |
---|
1100 | is <= the old data size and the key exists. |
---|
1101 | on failure return -1. |
---|
1102 | */ |
---|
1103 | |
---|
1104 | static int tdb_update_hash(TDB_CONTEXT *tdb, TDB_DATA key, uint32_t hash, TDB_DATA dbuf) |
---|
1105 | { |
---|
1106 | struct list_struct rec; |
---|
1107 | tdb_off rec_ptr; |
---|
1108 | |
---|
1109 | /* find entry */ |
---|
1110 | if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) |
---|
1111 | return -1; |
---|
1112 | |
---|
1113 | /* must be long enough key, data and tailer */ |
---|
1114 | if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off)) { |
---|
1115 | tdb->ecode = TDB_SUCCESS; /* Not really an error */ |
---|
1116 | return -1; |
---|
1117 | } |
---|
1118 | |
---|
1119 | if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, |
---|
1120 | dbuf.dptr, dbuf.dsize) == -1) |
---|
1121 | return -1; |
---|
1122 | |
---|
1123 | if (dbuf.dsize != rec.data_len) { |
---|
1124 | /* update size */ |
---|
1125 | rec.data_len = dbuf.dsize; |
---|
1126 | return rec_write(tdb, rec_ptr, &rec); |
---|
1127 | } |
---|
1128 | |
---|
1129 | return 0; |
---|
1130 | } |
---|
1131 | |
---|
1132 | /* find an entry in the database given a key */ |
---|
1133 | /* If an entry doesn't exist tdb_err will be set to |
---|
1134 | * TDB_ERR_NOEXIST. If a key has no data attached |
---|
1135 | * then the TDB_DATA will have zero length but |
---|
1136 | * a non-zero pointer |
---|
1137 | */ |
---|
1138 | |
---|
1139 | TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
1140 | { |
---|
1141 | tdb_off rec_ptr; |
---|
1142 | struct list_struct rec; |
---|
1143 | TDB_DATA ret; |
---|
1144 | uint32_t hash; |
---|
1145 | |
---|
1146 | /* find which hash bucket it is in */ |
---|
1147 | hash = tdb->hash_fn(&key); |
---|
1148 | if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) |
---|
1149 | return tdb_null; |
---|
1150 | |
---|
1151 | ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, |
---|
1152 | rec.data_len); |
---|
1153 | ret.dsize = rec.data_len; |
---|
1154 | tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); |
---|
1155 | return ret; |
---|
1156 | } |
---|
1157 | |
---|
1158 | /* check if an entry in the database exists |
---|
1159 | |
---|
1160 | note that 1 is returned if the key is found and 0 is returned if not found |
---|
1161 | this doesn't match the conventions in the rest of this module, but is |
---|
1162 | compatible with gdbm |
---|
1163 | */ |
---|
1164 | static int tdb_exists_hash(TDB_CONTEXT *tdb, TDB_DATA key, uint32_t hash) |
---|
1165 | { |
---|
1166 | struct list_struct rec; |
---|
1167 | |
---|
1168 | if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0) |
---|
1169 | return 0; |
---|
1170 | tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); |
---|
1171 | return 1; |
---|
1172 | } |
---|
1173 | |
---|
1174 | int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
1175 | { |
---|
1176 | uint32_t hash = tdb->hash_fn(&key); |
---|
1177 | return tdb_exists_hash(tdb, key, hash); |
---|
1178 | } |
---|
1179 | |
---|
1180 | /* record lock stops delete underneath */ |
---|
1181 | static int lock_record(TDB_CONTEXT *tdb, tdb_off off) |
---|
1182 | { |
---|
1183 | return off ? tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0) : 0; |
---|
1184 | } |
---|
1185 | /* |
---|
1186 | Write locks override our own fcntl readlocks, so check it here. |
---|
1187 | Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not |
---|
1188 | an error to fail to get the lock here. |
---|
1189 | */ |
---|
1190 | |
---|
1191 | static int write_lock_record(TDB_CONTEXT *tdb, tdb_off off) |
---|
1192 | { |
---|
1193 | struct tdb_traverse_lock *i; |
---|
1194 | for (i = &tdb->travlocks; i; i = i->next) |
---|
1195 | if (i->off == off) |
---|
1196 | return -1; |
---|
1197 | return tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1); |
---|
1198 | } |
---|
1199 | |
---|
1200 | /* |
---|
1201 | Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not |
---|
1202 | an error to fail to get the lock here. |
---|
1203 | */ |
---|
1204 | |
---|
1205 | static int write_unlock_record(TDB_CONTEXT *tdb, tdb_off off) |
---|
1206 | { |
---|
1207 | return tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0); |
---|
1208 | } |
---|
1209 | /* fcntl locks don't stack: avoid unlocking someone else's */ |
---|
1210 | static int unlock_record(TDB_CONTEXT *tdb, tdb_off off) |
---|
1211 | { |
---|
1212 | struct tdb_traverse_lock *i; |
---|
1213 | uint32_t count = 0; |
---|
1214 | |
---|
1215 | if (off == 0) |
---|
1216 | return 0; |
---|
1217 | for (i = &tdb->travlocks; i; i = i->next) |
---|
1218 | if (i->off == off) |
---|
1219 | count++; |
---|
1220 | return (count == 1 ? tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0) : 0); |
---|
1221 | } |
---|
1222 | |
---|
1223 | /* actually delete an entry in the database given the offset */ |
---|
1224 | static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec) |
---|
1225 | { |
---|
1226 | tdb_off last_ptr, i; |
---|
1227 | struct list_struct lastrec; |
---|
1228 | |
---|
1229 | if (tdb->read_only) return -1; |
---|
1230 | |
---|
1231 | if (write_lock_record(tdb, rec_ptr) == -1) { |
---|
1232 | /* Someone traversing here: mark it as dead */ |
---|
1233 | rec->magic = TDB_DEAD_MAGIC; |
---|
1234 | return rec_write(tdb, rec_ptr, rec); |
---|
1235 | } |
---|
1236 | if (write_unlock_record(tdb, rec_ptr) != 0) |
---|
1237 | return -1; |
---|
1238 | |
---|
1239 | /* find previous record in hash chain */ |
---|
1240 | if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1) |
---|
1241 | return -1; |
---|
1242 | for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next) |
---|
1243 | if (rec_read(tdb, i, &lastrec) == -1) |
---|
1244 | return -1; |
---|
1245 | |
---|
1246 | /* unlink it: next ptr is at start of record. */ |
---|
1247 | if (last_ptr == 0) |
---|
1248 | last_ptr = TDB_HASH_TOP(rec->full_hash); |
---|
1249 | if (ofs_write(tdb, last_ptr, &rec->next) == -1) |
---|
1250 | return -1; |
---|
1251 | |
---|
1252 | /* recover the space */ |
---|
1253 | if (tdb_free(tdb, rec_ptr, rec) == -1) |
---|
1254 | return -1; |
---|
1255 | return 0; |
---|
1256 | } |
---|
1257 | |
---|
1258 | /* Uses traverse lock: 0 = finish, -1 = error, other = record offset */ |
---|
1259 | static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock, |
---|
1260 | struct list_struct *rec) |
---|
1261 | { |
---|
1262 | int want_next = (tlock->off != 0); |
---|
1263 | |
---|
1264 | /* Lock each chain from the start one. */ |
---|
1265 | for (; tlock->hash < tdb->header.hash_size; tlock->hash++) { |
---|
1266 | |
---|
1267 | /* this is an optimisation for the common case where |
---|
1268 | the hash chain is empty, which is particularly |
---|
1269 | common for the use of tdb with ldb, where large |
---|
1270 | hashes are used. In that case we spend most of our |
---|
1271 | time in tdb_brlock(), locking empty hash chains. |
---|
1272 | |
---|
1273 | To avoid this, we do an unlocked pre-check to see |
---|
1274 | if the hash chain is empty before starting to look |
---|
1275 | inside it. If it is empty then we can avoid that |
---|
1276 | hash chain. If it isn't empty then we can't believe |
---|
1277 | the value we get back, as we read it without a |
---|
1278 | lock, so instead we get the lock and re-fetch the |
---|
1279 | value below. |
---|
1280 | |
---|
1281 | Notice that not doing this optimisation on the |
---|
1282 | first hash chain is critical. We must guarantee |
---|
1283 | that we have done at least one fcntl lock at the |
---|
1284 | start of a search to guarantee that memory is |
---|
1285 | coherent on SMP systems. If records are added by |
---|
1286 | others during the search then thats OK, and we |
---|
1287 | could possibly miss those with this trick, but we |
---|
1288 | could miss them anyway without this trick, so the |
---|
1289 | semantics don't change. |
---|
1290 | |
---|
1291 | With a non-indexed ldb search this trick gains us a |
---|
1292 | factor of around 80 in speed on a linux 2.6.x |
---|
1293 | system (testing using ldbtest). |
---|
1294 | */ |
---|
1295 | if (!tlock->off && tlock->hash != 0) { |
---|
1296 | uint32_t off; |
---|
1297 | if (tdb->map_ptr) { |
---|
1298 | for (;tlock->hash < tdb->header.hash_size;tlock->hash++) { |
---|
1299 | if (0 != *(uint32_t *)(TDB_HASH_TOP(tlock->hash) + (unsigned char *)tdb->map_ptr)) { |
---|
1300 | break; |
---|
1301 | } |
---|
1302 | } |
---|
1303 | if (tlock->hash == tdb->header.hash_size) { |
---|
1304 | continue; |
---|
1305 | } |
---|
1306 | } else { |
---|
1307 | if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), &off) == 0 && |
---|
1308 | off == 0) { |
---|
1309 | continue; |
---|
1310 | } |
---|
1311 | } |
---|
1312 | } |
---|
1313 | |
---|
1314 | if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1) |
---|
1315 | return -1; |
---|
1316 | |
---|
1317 | /* No previous record? Start at top of chain. */ |
---|
1318 | if (!tlock->off) { |
---|
1319 | if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash), |
---|
1320 | &tlock->off) == -1) |
---|
1321 | goto fail; |
---|
1322 | } else { |
---|
1323 | /* Otherwise unlock the previous record. */ |
---|
1324 | if (unlock_record(tdb, tlock->off) != 0) |
---|
1325 | goto fail; |
---|
1326 | } |
---|
1327 | |
---|
1328 | if (want_next) { |
---|
1329 | /* We have offset of old record: grab next */ |
---|
1330 | if (rec_read(tdb, tlock->off, rec) == -1) |
---|
1331 | goto fail; |
---|
1332 | tlock->off = rec->next; |
---|
1333 | } |
---|
1334 | |
---|
1335 | /* Iterate through chain */ |
---|
1336 | while( tlock->off) { |
---|
1337 | tdb_off current; |
---|
1338 | if (rec_read(tdb, tlock->off, rec) == -1) |
---|
1339 | goto fail; |
---|
1340 | |
---|
1341 | /* Detect infinite loops. From "Shlomi Yaakobovich" <Shlomi@exanet.com>. */ |
---|
1342 | if (tlock->off == rec->next) { |
---|
1343 | TDB_LOG((tdb, 0, "tdb_next_lock: loop detected.\n")); |
---|
1344 | goto fail; |
---|
1345 | } |
---|
1346 | |
---|
1347 | if (!TDB_DEAD(rec)) { |
---|
1348 | /* Woohoo: we found one! */ |
---|
1349 | if (lock_record(tdb, tlock->off) != 0) |
---|
1350 | goto fail; |
---|
1351 | return tlock->off; |
---|
1352 | } |
---|
1353 | |
---|
1354 | /* Try to clean dead ones from old traverses */ |
---|
1355 | current = tlock->off; |
---|
1356 | tlock->off = rec->next; |
---|
1357 | if (!tdb->read_only && |
---|
1358 | do_delete(tdb, current, rec) != 0) |
---|
1359 | goto fail; |
---|
1360 | } |
---|
1361 | tdb_unlock(tdb, tlock->hash, F_WRLCK); |
---|
1362 | want_next = 0; |
---|
1363 | } |
---|
1364 | /* We finished iteration without finding anything */ |
---|
1365 | return TDB_ERRCODE(TDB_SUCCESS, 0); |
---|
1366 | |
---|
1367 | fail: |
---|
1368 | tlock->off = 0; |
---|
1369 | if (tdb_unlock(tdb, tlock->hash, F_WRLCK) != 0) |
---|
1370 | TDB_LOG((tdb, 0, "tdb_next_lock: On error unlock failed!\n")); |
---|
1371 | return -1; |
---|
1372 | } |
---|
1373 | |
---|
1374 | /* traverse the entire database - calling fn(tdb, key, data) on each element. |
---|
1375 | return -1 on error or the record count traversed |
---|
1376 | if fn is NULL then it is not called |
---|
1377 | a non-zero return value from fn() indicates that the traversal should stop |
---|
1378 | */ |
---|
1379 | int tdb_traverse(TDB_CONTEXT *tdb, tdb_traverse_func fn, void *private) |
---|
1380 | { |
---|
1381 | TDB_DATA key, dbuf; |
---|
1382 | struct list_struct rec; |
---|
1383 | struct tdb_traverse_lock tl = { NULL, 0, 0 }; |
---|
1384 | int ret, count = 0; |
---|
1385 | |
---|
1386 | /* This was in the initializaton, above, but the IRIX compiler |
---|
1387 | * did not like it. crh |
---|
1388 | */ |
---|
1389 | tl.next = tdb->travlocks.next; |
---|
1390 | |
---|
1391 | /* fcntl locks don't stack: beware traverse inside traverse */ |
---|
1392 | tdb->travlocks.next = &tl; |
---|
1393 | |
---|
1394 | /* tdb_next_lock places locks on the record returned, and its chain */ |
---|
1395 | while ((ret = tdb_next_lock(tdb, &tl, &rec)) > 0) { |
---|
1396 | count++; |
---|
1397 | /* now read the full record */ |
---|
1398 | key.dptr = tdb_alloc_read(tdb, tl.off + sizeof(rec), |
---|
1399 | rec.key_len + rec.data_len); |
---|
1400 | if (!key.dptr) { |
---|
1401 | ret = -1; |
---|
1402 | if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) |
---|
1403 | goto out; |
---|
1404 | if (unlock_record(tdb, tl.off) != 0) |
---|
1405 | TDB_LOG((tdb, 0, "tdb_traverse: key.dptr == NULL and unlock_record failed!\n")); |
---|
1406 | goto out; |
---|
1407 | } |
---|
1408 | key.dsize = rec.key_len; |
---|
1409 | dbuf.dptr = key.dptr + rec.key_len; |
---|
1410 | dbuf.dsize = rec.data_len; |
---|
1411 | |
---|
1412 | /* Drop chain lock, call out */ |
---|
1413 | if (tdb_unlock(tdb, tl.hash, F_WRLCK) != 0) { |
---|
1414 | ret = -1; |
---|
1415 | goto out; |
---|
1416 | } |
---|
1417 | if (fn && fn(tdb, key, dbuf, private)) { |
---|
1418 | /* They want us to terminate traversal */ |
---|
1419 | ret = count; |
---|
1420 | if (unlock_record(tdb, tl.off) != 0) { |
---|
1421 | TDB_LOG((tdb, 0, "tdb_traverse: unlock_record failed!\n"));; |
---|
1422 | ret = -1; |
---|
1423 | } |
---|
1424 | tdb->travlocks.next = tl.next; |
---|
1425 | SAFE_FREE(key.dptr); |
---|
1426 | return count; |
---|
1427 | } |
---|
1428 | SAFE_FREE(key.dptr); |
---|
1429 | } |
---|
1430 | out: |
---|
1431 | tdb->travlocks.next = tl.next; |
---|
1432 | if (ret < 0) |
---|
1433 | return -1; |
---|
1434 | else |
---|
1435 | return count; |
---|
1436 | } |
---|
1437 | |
---|
1438 | /* find the first entry in the database and return its key */ |
---|
1439 | TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb) |
---|
1440 | { |
---|
1441 | TDB_DATA key; |
---|
1442 | struct list_struct rec; |
---|
1443 | |
---|
1444 | /* release any old lock */ |
---|
1445 | if (unlock_record(tdb, tdb->travlocks.off) != 0) |
---|
1446 | return tdb_null; |
---|
1447 | tdb->travlocks.off = tdb->travlocks.hash = 0; |
---|
1448 | |
---|
1449 | if (tdb_next_lock(tdb, &tdb->travlocks, &rec) <= 0) |
---|
1450 | return tdb_null; |
---|
1451 | /* now read the key */ |
---|
1452 | key.dsize = rec.key_len; |
---|
1453 | key.dptr =tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec),key.dsize); |
---|
1454 | if (tdb_unlock(tdb, BUCKET(tdb->travlocks.hash), F_WRLCK) != 0) |
---|
1455 | TDB_LOG((tdb, 0, "tdb_firstkey: error occurred while tdb_unlocking!\n")); |
---|
1456 | return key; |
---|
1457 | } |
---|
1458 | |
---|
1459 | /* find the next entry in the database, returning its key */ |
---|
1460 | TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA oldkey) |
---|
1461 | { |
---|
1462 | uint32_t oldhash; |
---|
1463 | TDB_DATA key = tdb_null; |
---|
1464 | struct list_struct rec; |
---|
1465 | char *k = NULL; |
---|
1466 | |
---|
1467 | /* Is locked key the old key? If so, traverse will be reliable. */ |
---|
1468 | if (tdb->travlocks.off) { |
---|
1469 | if (tdb_lock(tdb,tdb->travlocks.hash,F_WRLCK)) |
---|
1470 | return tdb_null; |
---|
1471 | if (rec_read(tdb, tdb->travlocks.off, &rec) == -1 |
---|
1472 | || !(k = tdb_alloc_read(tdb,tdb->travlocks.off+sizeof(rec), |
---|
1473 | rec.key_len)) |
---|
1474 | || memcmp(k, oldkey.dptr, oldkey.dsize) != 0) { |
---|
1475 | /* No, it wasn't: unlock it and start from scratch */ |
---|
1476 | if (unlock_record(tdb, tdb->travlocks.off) != 0) |
---|
1477 | return tdb_null; |
---|
1478 | if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0) |
---|
1479 | return tdb_null; |
---|
1480 | tdb->travlocks.off = 0; |
---|
1481 | } |
---|
1482 | |
---|
1483 | SAFE_FREE(k); |
---|
1484 | } |
---|
1485 | |
---|
1486 | if (!tdb->travlocks.off) { |
---|
1487 | /* No previous element: do normal find, and lock record */ |
---|
1488 | tdb->travlocks.off = tdb_find_lock_hash(tdb, oldkey, tdb->hash_fn(&oldkey), F_WRLCK, &rec); |
---|
1489 | if (!tdb->travlocks.off) |
---|
1490 | return tdb_null; |
---|
1491 | tdb->travlocks.hash = BUCKET(rec.full_hash); |
---|
1492 | if (lock_record(tdb, tdb->travlocks.off) != 0) { |
---|
1493 | TDB_LOG((tdb, 0, "tdb_nextkey: lock_record failed (%s)!\n", strerror(errno))); |
---|
1494 | return tdb_null; |
---|
1495 | } |
---|
1496 | } |
---|
1497 | oldhash = tdb->travlocks.hash; |
---|
1498 | |
---|
1499 | /* Grab next record: locks chain and returned record, |
---|
1500 | unlocks old record */ |
---|
1501 | if (tdb_next_lock(tdb, &tdb->travlocks, &rec) > 0) { |
---|
1502 | key.dsize = rec.key_len; |
---|
1503 | key.dptr = tdb_alloc_read(tdb, tdb->travlocks.off+sizeof(rec), |
---|
1504 | key.dsize); |
---|
1505 | /* Unlock the chain of this new record */ |
---|
1506 | if (tdb_unlock(tdb, tdb->travlocks.hash, F_WRLCK) != 0) |
---|
1507 | TDB_LOG((tdb, 0, "tdb_nextkey: WARNING tdb_unlock failed!\n")); |
---|
1508 | } |
---|
1509 | /* Unlock the chain of old record */ |
---|
1510 | if (tdb_unlock(tdb, BUCKET(oldhash), F_WRLCK) != 0) |
---|
1511 | TDB_LOG((tdb, 0, "tdb_nextkey: WARNING tdb_unlock failed!\n")); |
---|
1512 | return key; |
---|
1513 | } |
---|
1514 | |
---|
1515 | /* delete an entry in the database given a key */ |
---|
1516 | static int tdb_delete_hash(TDB_CONTEXT *tdb, TDB_DATA key, uint32_t hash) |
---|
1517 | { |
---|
1518 | tdb_off rec_ptr; |
---|
1519 | struct list_struct rec; |
---|
1520 | int ret; |
---|
1521 | |
---|
1522 | if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec))) |
---|
1523 | return -1; |
---|
1524 | ret = do_delete(tdb, rec_ptr, &rec); |
---|
1525 | if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0) |
---|
1526 | TDB_LOG((tdb, 0, "tdb_delete: WARNING tdb_unlock failed!\n")); |
---|
1527 | return ret; |
---|
1528 | } |
---|
1529 | |
---|
1530 | int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
1531 | { |
---|
1532 | uint32_t hash = tdb->hash_fn(&key); |
---|
1533 | return tdb_delete_hash(tdb, key, hash); |
---|
1534 | } |
---|
1535 | |
---|
1536 | /* store an element in the database, replacing any existing element |
---|
1537 | with the same key |
---|
1538 | |
---|
1539 | return 0 on success, -1 on failure |
---|
1540 | */ |
---|
1541 | int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) |
---|
1542 | { |
---|
1543 | struct list_struct rec; |
---|
1544 | uint32_t hash; |
---|
1545 | tdb_off rec_ptr; |
---|
1546 | char *p = NULL; |
---|
1547 | int ret = 0; |
---|
1548 | |
---|
1549 | /* find which hash bucket it is in */ |
---|
1550 | hash = tdb->hash_fn(&key); |
---|
1551 | if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) |
---|
1552 | return -1; |
---|
1553 | |
---|
1554 | /* check for it existing, on insert. */ |
---|
1555 | if (flag == TDB_INSERT) { |
---|
1556 | if (tdb_exists_hash(tdb, key, hash)) { |
---|
1557 | tdb->ecode = TDB_ERR_EXISTS; |
---|
1558 | goto fail; |
---|
1559 | } |
---|
1560 | } else { |
---|
1561 | /* first try in-place update, on modify or replace. */ |
---|
1562 | if (tdb_update_hash(tdb, key, hash, dbuf) == 0) |
---|
1563 | goto out; |
---|
1564 | if (tdb->ecode == TDB_ERR_NOEXIST && |
---|
1565 | flag == TDB_MODIFY) { |
---|
1566 | /* if the record doesn't exist and we are in TDB_MODIFY mode then |
---|
1567 | we should fail the store */ |
---|
1568 | goto fail; |
---|
1569 | } |
---|
1570 | } |
---|
1571 | /* reset the error code potentially set by the tdb_update() */ |
---|
1572 | tdb->ecode = TDB_SUCCESS; |
---|
1573 | |
---|
1574 | /* delete any existing record - if it doesn't exist we don't |
---|
1575 | care. Doing this first reduces fragmentation, and avoids |
---|
1576 | coalescing with `allocated' block before it's updated. */ |
---|
1577 | if (flag != TDB_INSERT) |
---|
1578 | tdb_delete_hash(tdb, key, hash); |
---|
1579 | |
---|
1580 | /* Copy key+value *before* allocating free space in case malloc |
---|
1581 | fails and we are left with a dead spot in the tdb. */ |
---|
1582 | |
---|
1583 | if (!(p = (char *)talloc_size(tdb, key.dsize + dbuf.dsize))) { |
---|
1584 | tdb->ecode = TDB_ERR_OOM; |
---|
1585 | goto fail; |
---|
1586 | } |
---|
1587 | |
---|
1588 | memcpy(p, key.dptr, key.dsize); |
---|
1589 | if (dbuf.dsize) |
---|
1590 | memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize); |
---|
1591 | |
---|
1592 | /* we have to allocate some space */ |
---|
1593 | if (!(rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec))) |
---|
1594 | goto fail; |
---|
1595 | |
---|
1596 | /* Read hash top into next ptr */ |
---|
1597 | if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1) |
---|
1598 | goto fail; |
---|
1599 | |
---|
1600 | rec.key_len = key.dsize; |
---|
1601 | rec.data_len = dbuf.dsize; |
---|
1602 | rec.full_hash = hash; |
---|
1603 | rec.magic = TDB_MAGIC; |
---|
1604 | |
---|
1605 | /* write out and point the top of the hash chain at it */ |
---|
1606 | if (rec_write(tdb, rec_ptr, &rec) == -1 |
---|
1607 | || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1 |
---|
1608 | || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) { |
---|
1609 | /* Need to tdb_unallocate() here */ |
---|
1610 | goto fail; |
---|
1611 | } |
---|
1612 | out: |
---|
1613 | SAFE_FREE(p); |
---|
1614 | tdb_unlock(tdb, BUCKET(hash), F_WRLCK); |
---|
1615 | return ret; |
---|
1616 | fail: |
---|
1617 | ret = -1; |
---|
1618 | goto out; |
---|
1619 | } |
---|
1620 | |
---|
1621 | /* Attempt to append data to an entry in place - this only works if the new data size |
---|
1622 | is <= the old data size and the key exists. |
---|
1623 | on failure return -1. Record must be locked before calling. |
---|
1624 | */ |
---|
1625 | static int tdb_append_inplace(TDB_CONTEXT *tdb, TDB_DATA key, uint32_t hash, TDB_DATA new_dbuf) |
---|
1626 | { |
---|
1627 | struct list_struct rec; |
---|
1628 | tdb_off rec_ptr; |
---|
1629 | |
---|
1630 | /* find entry */ |
---|
1631 | if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) |
---|
1632 | return -1; |
---|
1633 | |
---|
1634 | /* Append of 0 is always ok. */ |
---|
1635 | if (new_dbuf.dsize == 0) |
---|
1636 | return 0; |
---|
1637 | |
---|
1638 | /* must be long enough for key, old data + new data and tailer */ |
---|
1639 | if (rec.rec_len < key.dsize + rec.data_len + new_dbuf.dsize + sizeof(tdb_off)) { |
---|
1640 | /* No room. */ |
---|
1641 | tdb->ecode = TDB_SUCCESS; /* Not really an error */ |
---|
1642 | return -1; |
---|
1643 | } |
---|
1644 | |
---|
1645 | if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len + rec.data_len, |
---|
1646 | new_dbuf.dptr, new_dbuf.dsize) == -1) |
---|
1647 | return -1; |
---|
1648 | |
---|
1649 | /* update size */ |
---|
1650 | rec.data_len += new_dbuf.dsize; |
---|
1651 | return rec_write(tdb, rec_ptr, &rec); |
---|
1652 | } |
---|
1653 | |
---|
1654 | /* Append to an entry. Create if not exist. */ |
---|
1655 | |
---|
1656 | int tdb_append(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA new_dbuf) |
---|
1657 | { |
---|
1658 | struct list_struct rec; |
---|
1659 | uint32_t hash; |
---|
1660 | tdb_off rec_ptr; |
---|
1661 | char *p = NULL; |
---|
1662 | int ret = 0; |
---|
1663 | size_t new_data_size = 0; |
---|
1664 | |
---|
1665 | /* find which hash bucket it is in */ |
---|
1666 | hash = tdb->hash_fn(&key); |
---|
1667 | if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) |
---|
1668 | return -1; |
---|
1669 | |
---|
1670 | /* first try in-place. */ |
---|
1671 | if (tdb_append_inplace(tdb, key, hash, new_dbuf) == 0) |
---|
1672 | goto out; |
---|
1673 | |
---|
1674 | /* reset the error code potentially set by the tdb_append_inplace() */ |
---|
1675 | tdb->ecode = TDB_SUCCESS; |
---|
1676 | |
---|
1677 | /* find entry */ |
---|
1678 | if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) { |
---|
1679 | if (tdb->ecode != TDB_ERR_NOEXIST) |
---|
1680 | goto fail; |
---|
1681 | |
---|
1682 | /* Not found - create. */ |
---|
1683 | |
---|
1684 | ret = tdb_store(tdb, key, new_dbuf, TDB_INSERT); |
---|
1685 | goto out; |
---|
1686 | } |
---|
1687 | |
---|
1688 | new_data_size = rec.data_len + new_dbuf.dsize; |
---|
1689 | |
---|
1690 | /* Copy key+old_value+value *before* allocating free space in case malloc |
---|
1691 | fails and we are left with a dead spot in the tdb. */ |
---|
1692 | |
---|
1693 | if (!(p = (char *)talloc_size(tdb, key.dsize + new_data_size))) { |
---|
1694 | tdb->ecode = TDB_ERR_OOM; |
---|
1695 | goto fail; |
---|
1696 | } |
---|
1697 | |
---|
1698 | /* Copy the key in place. */ |
---|
1699 | memcpy(p, key.dptr, key.dsize); |
---|
1700 | |
---|
1701 | /* Now read the old data into place. */ |
---|
1702 | if (rec.data_len && |
---|
1703 | tdb_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, p + key.dsize, rec.data_len, 0) == -1) |
---|
1704 | goto fail; |
---|
1705 | |
---|
1706 | /* Finally append the new data. */ |
---|
1707 | if (new_dbuf.dsize) |
---|
1708 | memcpy(p+key.dsize+rec.data_len, new_dbuf.dptr, new_dbuf.dsize); |
---|
1709 | |
---|
1710 | /* delete any existing record - if it doesn't exist we don't |
---|
1711 | care. Doing this first reduces fragmentation, and avoids |
---|
1712 | coalescing with `allocated' block before it's updated. */ |
---|
1713 | |
---|
1714 | tdb_delete_hash(tdb, key, hash); |
---|
1715 | |
---|
1716 | if (!(rec_ptr = tdb_allocate(tdb, key.dsize + new_data_size, &rec))) |
---|
1717 | goto fail; |
---|
1718 | |
---|
1719 | /* Read hash top into next ptr */ |
---|
1720 | if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1) |
---|
1721 | goto fail; |
---|
1722 | |
---|
1723 | rec.key_len = key.dsize; |
---|
1724 | rec.data_len = new_data_size; |
---|
1725 | rec.full_hash = hash; |
---|
1726 | rec.magic = TDB_MAGIC; |
---|
1727 | |
---|
1728 | /* write out and point the top of the hash chain at it */ |
---|
1729 | if (rec_write(tdb, rec_ptr, &rec) == -1 |
---|
1730 | || tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+new_data_size)==-1 |
---|
1731 | || ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) { |
---|
1732 | /* Need to tdb_unallocate() here */ |
---|
1733 | goto fail; |
---|
1734 | } |
---|
1735 | |
---|
1736 | out: |
---|
1737 | SAFE_FREE(p); |
---|
1738 | tdb_unlock(tdb, BUCKET(hash), F_WRLCK); |
---|
1739 | return ret; |
---|
1740 | |
---|
1741 | fail: |
---|
1742 | ret = -1; |
---|
1743 | goto out; |
---|
1744 | } |
---|
1745 | |
---|
1746 | static int tdb_already_open(dev_t device, |
---|
1747 | ino_t ino) |
---|
1748 | { |
---|
1749 | TDB_CONTEXT *i; |
---|
1750 | |
---|
1751 | for (i = tdbs; i; i = i->next) { |
---|
1752 | if (i->device == device && i->inode == ino) { |
---|
1753 | return 1; |
---|
1754 | } |
---|
1755 | } |
---|
1756 | |
---|
1757 | return 0; |
---|
1758 | } |
---|
1759 | |
---|
1760 | /* open the database, creating it if necessary |
---|
1761 | |
---|
1762 | The open_flags and mode are passed straight to the open call on the |
---|
1763 | database file. A flags value of O_WRONLY is invalid. The hash size |
---|
1764 | is advisory, use zero for a default value. |
---|
1765 | |
---|
1766 | Return is NULL on error, in which case errno is also set. Don't |
---|
1767 | try to call tdb_error or tdb_errname, just do strerror(errno). |
---|
1768 | |
---|
1769 | @param name may be NULL for internal databases. */ |
---|
1770 | TDB_CONTEXT *tdb_open(const char *name, int hash_size, int tdb_flags, |
---|
1771 | int open_flags, mode_t mode) |
---|
1772 | { |
---|
1773 | return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL); |
---|
1774 | } |
---|
1775 | |
---|
1776 | /* a default logging function */ |
---|
1777 | static void null_log_fn(TDB_CONTEXT *tdb __attribute__((unused)), |
---|
1778 | int level __attribute__((unused)), |
---|
1779 | const char *fmt __attribute__((unused)), ...) |
---|
1780 | { |
---|
1781 | } |
---|
1782 | |
---|
1783 | |
---|
1784 | TDB_CONTEXT *tdb_open_ex(const char *name, int hash_size, int tdb_flags, |
---|
1785 | int open_flags, mode_t mode, |
---|
1786 | tdb_log_func log_fn, |
---|
1787 | tdb_hash_func hash_fn) |
---|
1788 | { |
---|
1789 | TDB_CONTEXT *tdb; |
---|
1790 | struct stat st; |
---|
1791 | int rev = 0, locked = 0; |
---|
1792 | uint8_t *vp; |
---|
1793 | uint32_t vertest; |
---|
1794 | |
---|
1795 | if (!(tdb = talloc_zero(name, TDB_CONTEXT))) { |
---|
1796 | /* Can't log this */ |
---|
1797 | errno = ENOMEM; |
---|
1798 | goto fail; |
---|
1799 | } |
---|
1800 | tdb->fd = -1; |
---|
1801 | tdb->name = NULL; |
---|
1802 | tdb->map_ptr = NULL; |
---|
1803 | tdb->flags = tdb_flags; |
---|
1804 | tdb->open_flags = open_flags; |
---|
1805 | tdb->log_fn = log_fn?log_fn:null_log_fn; |
---|
1806 | tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash; |
---|
1807 | |
---|
1808 | if ((open_flags & O_ACCMODE) == O_WRONLY) { |
---|
1809 | TDB_LOG((tdb, 0, "tdb_open_ex: can't open tdb %s write-only\n", |
---|
1810 | name)); |
---|
1811 | errno = EINVAL; |
---|
1812 | goto fail; |
---|
1813 | } |
---|
1814 | |
---|
1815 | if (hash_size == 0) |
---|
1816 | hash_size = DEFAULT_HASH_SIZE; |
---|
1817 | if ((open_flags & O_ACCMODE) == O_RDONLY) { |
---|
1818 | tdb->read_only = 1; |
---|
1819 | /* read only databases don't do locking or clear if first */ |
---|
1820 | tdb->flags |= TDB_NOLOCK; |
---|
1821 | tdb->flags &= ~TDB_CLEAR_IF_FIRST; |
---|
1822 | } |
---|
1823 | |
---|
1824 | /* internal databases don't mmap or lock, and start off cleared */ |
---|
1825 | if (tdb->flags & TDB_INTERNAL) { |
---|
1826 | tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP); |
---|
1827 | tdb->flags &= ~TDB_CLEAR_IF_FIRST; |
---|
1828 | if (tdb_new_database(tdb, hash_size) != 0) { |
---|
1829 | TDB_LOG((tdb, 0, "tdb_open_ex: tdb_new_database failed!")); |
---|
1830 | goto fail; |
---|
1831 | } |
---|
1832 | goto internal; |
---|
1833 | } |
---|
1834 | |
---|
1835 | if ((tdb->fd = open(name, open_flags, mode)) == -1) { |
---|
1836 | TDB_LOG((tdb, 5, "tdb_open_ex: could not open file %s: %s\n", |
---|
1837 | name, strerror(errno))); |
---|
1838 | goto fail; /* errno set by open(2) */ |
---|
1839 | } |
---|
1840 | |
---|
1841 | /* ensure there is only one process initialising at once */ |
---|
1842 | if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0) == -1) { |
---|
1843 | TDB_LOG((tdb, 0, "tdb_open_ex: failed to get global lock on %s: %s\n", |
---|
1844 | name, strerror(errno))); |
---|
1845 | goto fail; /* errno set by tdb_brlock */ |
---|
1846 | } |
---|
1847 | |
---|
1848 | /* we need to zero database if we are the only one with it open */ |
---|
1849 | if ((tdb_flags & TDB_CLEAR_IF_FIRST) && |
---|
1850 | (locked = (tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0) == 0))) { |
---|
1851 | open_flags |= O_CREAT; |
---|
1852 | if (ftruncate(tdb->fd, 0) == -1) { |
---|
1853 | TDB_LOG((tdb, 0, "tdb_open_ex: " |
---|
1854 | "failed to truncate %s: %s\n", |
---|
1855 | name, strerror(errno))); |
---|
1856 | goto fail; /* errno set by ftruncate */ |
---|
1857 | } |
---|
1858 | } |
---|
1859 | |
---|
1860 | if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) |
---|
1861 | || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0 |
---|
1862 | || (tdb->header.version != TDB_VERSION |
---|
1863 | && !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION))))) { |
---|
1864 | /* its not a valid database - possibly initialise it */ |
---|
1865 | if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { |
---|
1866 | errno = EIO; /* ie bad format or something */ |
---|
1867 | goto fail; |
---|
1868 | } |
---|
1869 | rev = (tdb->flags & TDB_CONVERT); |
---|
1870 | } |
---|
1871 | vp = (uint8_t *)&tdb->header.version; |
---|
1872 | vertest = (((uint32_t)vp[0]) << 24) | (((uint32_t)vp[1]) << 16) | |
---|
1873 | (((uint32_t)vp[2]) << 8) | (uint32_t)vp[3]; |
---|
1874 | tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0; |
---|
1875 | if (!rev) |
---|
1876 | tdb->flags &= ~TDB_CONVERT; |
---|
1877 | else { |
---|
1878 | tdb->flags |= TDB_CONVERT; |
---|
1879 | convert(&tdb->header, sizeof(tdb->header)); |
---|
1880 | } |
---|
1881 | if (fstat(tdb->fd, &st) == -1) |
---|
1882 | goto fail; |
---|
1883 | |
---|
1884 | /* Is it already in the open list? If so, fail. */ |
---|
1885 | if (tdb_already_open(st.st_dev, st.st_ino)) { |
---|
1886 | TDB_LOG((tdb, 2, "tdb_open_ex: " |
---|
1887 | "%s (%d,%d) is already open in this process\n", |
---|
1888 | name, (int)st.st_dev, (int)st.st_ino)); |
---|
1889 | errno = EBUSY; |
---|
1890 | goto fail; |
---|
1891 | } |
---|
1892 | |
---|
1893 | if (!(tdb->name = (char *)talloc_strdup(tdb, name))) { |
---|
1894 | errno = ENOMEM; |
---|
1895 | goto fail; |
---|
1896 | } |
---|
1897 | |
---|
1898 | tdb->map_size = st.st_size; |
---|
1899 | tdb->device = st.st_dev; |
---|
1900 | tdb->inode = st.st_ino; |
---|
1901 | tdb->locked = talloc_zero_array(tdb, struct tdb_lock_type, |
---|
1902 | tdb->header.hash_size+1); |
---|
1903 | if (!tdb->locked) { |
---|
1904 | TDB_LOG((tdb, 2, "tdb_open_ex: " |
---|
1905 | "failed to allocate lock structure for %s\n", |
---|
1906 | name)); |
---|
1907 | errno = ENOMEM; |
---|
1908 | goto fail; |
---|
1909 | } |
---|
1910 | tdb_mmap(tdb); |
---|
1911 | if (locked) { |
---|
1912 | if (tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0) == -1) { |
---|
1913 | TDB_LOG((tdb, 0, "tdb_open_ex: " |
---|
1914 | "failed to take ACTIVE_LOCK on %s: %s\n", |
---|
1915 | name, strerror(errno))); |
---|
1916 | goto fail; |
---|
1917 | } |
---|
1918 | |
---|
1919 | } |
---|
1920 | |
---|
1921 | /* We always need to do this if the CLEAR_IF_FIRST flag is set, even if |
---|
1922 | we didn't get the initial exclusive lock as we need to let all other |
---|
1923 | users know we're using it. */ |
---|
1924 | |
---|
1925 | if (tdb_flags & TDB_CLEAR_IF_FIRST) { |
---|
1926 | /* leave this lock in place to indicate it's in use */ |
---|
1927 | if (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1) |
---|
1928 | goto fail; |
---|
1929 | } |
---|
1930 | |
---|
1931 | |
---|
1932 | internal: |
---|
1933 | /* Internal (memory-only) databases skip all the code above to |
---|
1934 | * do with disk files, and resume here by releasing their |
---|
1935 | * global lock and hooking into the active list. */ |
---|
1936 | if (tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0) == -1) |
---|
1937 | goto fail; |
---|
1938 | tdb->next = tdbs; |
---|
1939 | tdbs = tdb; |
---|
1940 | return tdb; |
---|
1941 | |
---|
1942 | fail: |
---|
1943 | { int save_errno = errno; |
---|
1944 | |
---|
1945 | if (!tdb) |
---|
1946 | return NULL; |
---|
1947 | |
---|
1948 | if (tdb->map_ptr) { |
---|
1949 | if (tdb->flags & TDB_INTERNAL) |
---|
1950 | SAFE_FREE(tdb->map_ptr); |
---|
1951 | else |
---|
1952 | tdb_munmap(tdb); |
---|
1953 | } |
---|
1954 | SAFE_FREE(tdb->name); |
---|
1955 | if (tdb->fd != -1) |
---|
1956 | if (close(tdb->fd) != 0) |
---|
1957 | TDB_LOG((tdb, 5, "tdb_open_ex: failed to close tdb->fd on error!\n")); |
---|
1958 | SAFE_FREE(tdb->locked); |
---|
1959 | SAFE_FREE(tdb); |
---|
1960 | errno = save_errno; |
---|
1961 | return NULL; |
---|
1962 | } |
---|
1963 | } |
---|
1964 | |
---|
1965 | /** |
---|
1966 | * Close a database. |
---|
1967 | * |
---|
1968 | * @returns -1 for error; 0 for success. |
---|
1969 | **/ |
---|
1970 | int tdb_close(TDB_CONTEXT *tdb) |
---|
1971 | { |
---|
1972 | TDB_CONTEXT **i; |
---|
1973 | int ret = 0; |
---|
1974 | |
---|
1975 | if (tdb->map_ptr) { |
---|
1976 | if (tdb->flags & TDB_INTERNAL) |
---|
1977 | SAFE_FREE(tdb->map_ptr); |
---|
1978 | else |
---|
1979 | tdb_munmap(tdb); |
---|
1980 | } |
---|
1981 | SAFE_FREE(tdb->name); |
---|
1982 | if (tdb->fd != -1) |
---|
1983 | ret = close(tdb->fd); |
---|
1984 | SAFE_FREE(tdb->locked); |
---|
1985 | |
---|
1986 | /* Remove from contexts list */ |
---|
1987 | for (i = &tdbs; *i; i = &(*i)->next) { |
---|
1988 | if (*i == tdb) { |
---|
1989 | *i = tdb->next; |
---|
1990 | break; |
---|
1991 | } |
---|
1992 | } |
---|
1993 | |
---|
1994 | memset(tdb, 0, sizeof(*tdb)); |
---|
1995 | SAFE_FREE(tdb); |
---|
1996 | |
---|
1997 | return ret; |
---|
1998 | } |
---|
1999 | |
---|
2000 | /* lock/unlock entire database */ |
---|
2001 | int tdb_lockall(TDB_CONTEXT *tdb) |
---|
2002 | { |
---|
2003 | uint32_t i; |
---|
2004 | |
---|
2005 | /* There are no locks on read-only dbs */ |
---|
2006 | if (tdb->read_only) |
---|
2007 | return TDB_ERRCODE(TDB_ERR_LOCK, -1); |
---|
2008 | for (i = 0; i < tdb->header.hash_size; i++) |
---|
2009 | if (tdb_lock(tdb, i, F_WRLCK)) |
---|
2010 | break; |
---|
2011 | |
---|
2012 | /* If error, release locks we have... */ |
---|
2013 | if (i < tdb->header.hash_size) { |
---|
2014 | uint32_t j; |
---|
2015 | |
---|
2016 | for ( j = 0; j < i; j++) |
---|
2017 | tdb_unlock(tdb, j, F_WRLCK); |
---|
2018 | return TDB_ERRCODE(TDB_ERR_NOLOCK, -1); |
---|
2019 | } |
---|
2020 | |
---|
2021 | return 0; |
---|
2022 | } |
---|
2023 | void tdb_unlockall(TDB_CONTEXT *tdb) |
---|
2024 | { |
---|
2025 | uint32_t i; |
---|
2026 | for (i=0; i < tdb->header.hash_size; i++) |
---|
2027 | tdb_unlock(tdb, i, F_WRLCK); |
---|
2028 | } |
---|
2029 | |
---|
2030 | /* lock/unlock one hash chain. This is meant to be used to reduce |
---|
2031 | contention - it cannot guarantee how many records will be locked */ |
---|
2032 | int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
2033 | { |
---|
2034 | return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK); |
---|
2035 | } |
---|
2036 | |
---|
2037 | int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
2038 | { |
---|
2039 | return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_WRLCK); |
---|
2040 | } |
---|
2041 | |
---|
2042 | int tdb_chainlock_read(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
2043 | { |
---|
2044 | return tdb_lock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK); |
---|
2045 | } |
---|
2046 | |
---|
2047 | int tdb_chainunlock_read(TDB_CONTEXT *tdb, TDB_DATA key) |
---|
2048 | { |
---|
2049 | return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK); |
---|
2050 | } |
---|
2051 | |
---|
2052 | |
---|
2053 | /* register a loging function */ |
---|
2054 | void tdb_logging_function(TDB_CONTEXT *tdb, void (*fn)(TDB_CONTEXT *, int , const char *, ...)) |
---|
2055 | { |
---|
2056 | tdb->log_fn = fn?fn:null_log_fn; |
---|
2057 | } |
---|
2058 | |
---|
2059 | |
---|
2060 | /* reopen a tdb - this can be used after a fork to ensure that we have an independent |
---|
2061 | seek pointer from our parent and to re-establish locks */ |
---|
2062 | int tdb_reopen(TDB_CONTEXT *tdb) |
---|
2063 | { |
---|
2064 | struct stat st; |
---|
2065 | |
---|
2066 | if (tdb->flags & TDB_INTERNAL) |
---|
2067 | return 0; /* Nothing to do. */ |
---|
2068 | if (tdb_munmap(tdb) != 0) { |
---|
2069 | TDB_LOG((tdb, 0, "tdb_reopen: munmap failed (%s)\n", strerror(errno))); |
---|
2070 | goto fail; |
---|
2071 | } |
---|
2072 | if (close(tdb->fd) != 0) |
---|
2073 | TDB_LOG((tdb, 0, "tdb_reopen: WARNING closing tdb->fd failed!\n")); |
---|
2074 | tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0); |
---|
2075 | if (tdb->fd == -1) { |
---|
2076 | TDB_LOG((tdb, 0, "tdb_reopen: open failed (%s)\n", strerror(errno))); |
---|
2077 | goto fail; |
---|
2078 | } |
---|
2079 | if (fstat(tdb->fd, &st) != 0) { |
---|
2080 | TDB_LOG((tdb, 0, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); |
---|
2081 | goto fail; |
---|
2082 | } |
---|
2083 | if (st.st_ino != tdb->inode || st.st_dev != tdb->device) { |
---|
2084 | TDB_LOG((tdb, 0, "tdb_reopen: file dev/inode has changed!\n")); |
---|
2085 | goto fail; |
---|
2086 | } |
---|
2087 | tdb_mmap(tdb); |
---|
2088 | if ((tdb->flags & TDB_CLEAR_IF_FIRST) && (tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0) == -1)) { |
---|
2089 | TDB_LOG((tdb, 0, "tdb_reopen: failed to obtain active lock\n")); |
---|
2090 | goto fail; |
---|
2091 | } |
---|
2092 | |
---|
2093 | return 0; |
---|
2094 | |
---|
2095 | fail: |
---|
2096 | tdb_close(tdb); |
---|
2097 | return -1; |
---|
2098 | } |
---|
2099 | |
---|
2100 | /* Not general: only works if single writer. */ |
---|
2101 | TDB_CONTEXT *tdb_copy(TDB_CONTEXT *tdb, const char *outfile) |
---|
2102 | { |
---|
2103 | int fd, saved_errno; |
---|
2104 | TDB_CONTEXT *copy; |
---|
2105 | |
---|
2106 | fd = open(outfile, O_TRUNC|O_CREAT|O_WRONLY, 0640); |
---|
2107 | if (fd < 0) |
---|
2108 | return NULL; |
---|
2109 | if (tdb->map_ptr) { |
---|
2110 | if (write(fd,tdb->map_ptr,tdb->map_size) != (int)tdb->map_size) |
---|
2111 | goto fail; |
---|
2112 | } else { |
---|
2113 | char buf[65536]; |
---|
2114 | int r; |
---|
2115 | |
---|
2116 | lseek(tdb->fd, 0, SEEK_SET); |
---|
2117 | while ((r = read(tdb->fd, buf, sizeof(buf))) > 0) { |
---|
2118 | if (write(fd, buf, r) != r) |
---|
2119 | goto fail; |
---|
2120 | } |
---|
2121 | if (r < 0) |
---|
2122 | goto fail; |
---|
2123 | } |
---|
2124 | copy = tdb_open(outfile, 0, 0, O_RDWR, 0); |
---|
2125 | if (!copy) |
---|
2126 | goto fail; |
---|
2127 | close(fd); |
---|
2128 | return copy; |
---|
2129 | |
---|
2130 | fail: |
---|
2131 | saved_errno = errno; |
---|
2132 | close(fd); |
---|
2133 | unlink(outfile); |
---|
2134 | errno = saved_errno; |
---|
2135 | return NULL; |
---|
2136 | } |
---|
2137 | |
---|
2138 | /* reopen all tdb's */ |
---|
2139 | int tdb_reopen_all(void) |
---|
2140 | { |
---|
2141 | TDB_CONTEXT *tdb; |
---|
2142 | |
---|
2143 | for (tdb=tdbs; tdb; tdb = tdb->next) { |
---|
2144 | /* Ensure no clear-if-first. */ |
---|
2145 | tdb->flags &= ~TDB_CLEAR_IF_FIRST; |
---|
2146 | if (tdb_reopen(tdb) != 0) |
---|
2147 | return -1; |
---|
2148 | } |
---|
2149 | |
---|
2150 | return 0; |
---|
2151 | } |
---|