1 /* $Cambridge: exim/src/src/lookups/cdb.c,v 1.3 2005/06/27 14:29:44 ph10 Exp $ */
3 /*************************************************
4 * Exim - an Internet mail transport agent *
5 *************************************************/
8 * Exim - CDB database lookup module
9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 * Copyright (c) 1998 Nigel Metheringham, Planet Online Ltd
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
18 * --------------------------------------------------------------
19 * Modified by PH for Exim 4:
20 * Changed over to using unsigned chars
21 * Makes use of lf_check_file() for file checking
22 * --------------------------------------------------------------
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
35 * This code implements Dan Bernstein's Constant DataBase (cdb) spec.
36 * Information, the spec and sample code for cdb can be obtained from
37 * http://www.pobox.com/~djb/cdb.html
39 * This implementation borrows some code from Dan Bernstein's
40 * implementation (which has no license restrictions applied to it).
41 * This (read-only) implementation is completely contained within
42 * cdb.[ch] it does *not* link against an external cdb library.
45 * There are 2 varients included within this code. One uses MMAP and
46 * should give better performance especially for multiple lookups on a
47 * modern machine. The other is the default implementation which is
48 * used in the case where the MMAP fails or if MMAP was not compiled
49 * in. this implementation is the same as the original reference cdb
50 * implementation. The MMAP version is compiled in if the HAVE_MMAP
51 * preprocessor define is defined - this should be set in the system
58 #include "lf_functions.h"
62 # include <sys/mman.h>
63 /* Not all implementations declare MAP_FAILED */
65 # define MAP_FAILED ((void *) -1)
66 # endif /* MAP_FAILED */
67 #endif /* HAVE_MMAP */
70 #define CDB_HASH_SPLIT 256 /* num pieces the hash table is split into */
71 #define CDB_HASH_MASK 255 /* mask to and off split value */
72 #define CDB_HASH_ENTRY 8 /* how big each offset it */
73 #define CDB_HASH_TABLE (CDB_HASH_SPLIT * CDB_HASH_ENTRY)
75 /* State information for cdb databases that are open NB while the db
76 * is open its contents will not change (cdb dbs are normally updated
77 * atomically by renaming). However the lifetime of one of these
78 * state structures should be limited - ie a long running daemon
79 * that opens one may hit problems....
89 /* 32 bit unsigned type - this is an int on all modern machines */
90 typedef unsigned int uint32;
94 * Internal function to make hash value */
97 cdb_hash(uschar *buf, unsigned int len)
105 h ^= (uint32) *buf++;
112 * Internal function to read len bytes from disk, coping with oddities */
122 r = Uread(fd,buf,len);
123 while ((r == -1) && (errno == EINTR));
124 if (r == -1) return -1;
125 if (r == 0) { errno = EIO; return -1; }
134 * Internal function to parse 4 byte number (endian independant) */
137 cdb_unpack(uschar *buf)
140 num = buf[3]; num <<= 8;
141 num += buf[2]; num <<= 8;
142 num += buf[1]; num <<= 8;
148 cdb_open(uschar *filename,
152 struct cdb_state *cdbp;
156 fileno = Uopen(filename, O_RDONLY, 0);
158 int save_errno = errno;
159 *errmsg = string_open_failed(errno, "%s for cdb lookup", filename);
164 if (fstat(fileno, &statbuf) == 0) {
165 /* If this is a valid file, then it *must* be at least
166 * CDB_HASH_TABLE bytes long */
167 if (statbuf.st_size < CDB_HASH_TABLE) {
168 int save_errno = errno;
169 *errmsg = string_open_failed(errno,
170 "%s too short for cdb lookup",
176 int save_errno = errno;
177 *errmsg = string_open_failed(errno,
178 "fstat(%s) failed - cannot do cdb lookup",
184 /* Having got a file open we need the structure to put things in */
185 cdbp = store_get(sizeof(struct cdb_state));
186 /* store_get() does not return if memory was not available... */
187 /* preload the structure.... */
188 cdbp->fileno = fileno;
189 cdbp->filelen = statbuf.st_size;
190 cdbp->cdb_map = NULL;
191 cdbp->cdb_offsets = NULL;
193 /* if we are allowed to we use mmap here.... */
201 if (mapbuf != MAP_FAILED) {
202 /* We have an mmap-ed section. Now we can just use it */
203 cdbp->cdb_map = mapbuf;
204 /* The offsets can be set to the same value since they should
205 * effectively be cached as well
207 cdbp->cdb_offsets = mapbuf;
209 /* Now return the state struct */
212 /* If we got here the map failed. Basically we can ignore
213 * this since we fall back to slower methods....
214 * However lets debug log it...
216 DEBUG(D_lookup) debug_printf("cdb mmap failed - %d\n", errno);
218 #endif /* HAVE_MMAP */
220 /* In this case we have either not got MMAP allowed, or it failed */
222 /* get a buffer to stash the basic offsets in - this should speed
223 * things up a lot - especially on multiple lookups */
224 cdbp->cdb_offsets = store_get(CDB_HASH_TABLE);
226 /* now fill the buffer up... */
227 if (cdb_bread(fileno, cdbp->cdb_offsets, CDB_HASH_TABLE) == -1) {
228 /* read of hash table failed, oh dear, oh.....
229 * time to give up I think....
230 * call the close routine (deallocs the memory), and return NULL */
231 *errmsg = string_open_failed(errno,
232 "cannot read header from %s for cdb lookup",
238 /* Everything else done - return the cache structure */
244 /*************************************************
245 * Check entry point *
246 *************************************************/
249 cdb_check(void *handle,
256 struct cdb_state * cdbp = handle;
257 return lf_check_file(cdbp->fileno,
269 /*************************************************
271 *************************************************/
274 cdb_find(void *handle,
282 struct cdb_state * cdbp = handle;
296 /* Keep picky compilers happy */
299 key_hash = cdb_hash((uschar *)keystring, key_len);
301 hash_offset_entry = CDB_HASH_ENTRY * (key_hash & CDB_HASH_MASK);
302 hash_offset = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry);
303 hash_offlen = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry + 4);
305 /* If the offset length is zero this key cannot be in the file */
306 if (hash_offlen == 0) {
309 hash_slotnm = (key_hash >> 8) % hash_offlen;
311 /* check to ensure that the file is not corrupt
312 * if the hash_offset + (hash_offlen * CDB_HASH_ENTRY) is longer
313 * than the file, then we have problems.... */
314 if ((hash_offset + (hash_offlen * CDB_HASH_ENTRY)) > cdbp->filelen) {
315 *errmsg = string_sprintf("cdb: corrupt cdb file %s (too short)",
317 DEBUG(D_lookup) debug_printf("%s\n", *errmsg);
321 cur_offset = hash_offset + (hash_slotnm * CDB_HASH_ENTRY);
322 end_offset = hash_offset + (hash_offlen * CDB_HASH_ENTRY);
323 /* if we are allowed to we use mmap here.... */
325 /* make sure the mmap was OK */
326 if (cdbp->cdb_map != NULL) {
327 uschar * cur_pos = cur_offset + cdbp->cdb_map;
328 uschar * end_pos = end_offset + cdbp->cdb_map;
329 for (loop = 0; (loop < hash_offlen); ++loop) {
330 item_hash = cdb_unpack(cur_pos);
332 item_posn = cdb_unpack(cur_pos);
334 /* if the position is zero then we have a definite miss */
338 if (item_hash == key_hash) {
339 /* matching hash value */
340 uschar * item_ptr = cdbp->cdb_map + item_posn;
341 item_key_len = cdb_unpack(item_ptr);
343 item_dat_len = cdb_unpack(item_ptr);
345 /* check key length matches */
346 if (item_key_len == key_len) {
347 /* finally check if key matches */
348 if (Ustrncmp(keystring, item_ptr, key_len) == 0) {
349 /* we have a match....
350 * make item_ptr point to data */
351 item_ptr += item_key_len;
352 /* ... and the returned result */
353 *result = store_get(item_dat_len + 1);
354 memcpy(*result, item_ptr, item_dat_len);
355 (*result)[item_dat_len] = 0;
360 /* handle warp round of table */
361 if (cur_pos == end_pos)
362 cur_pos = cdbp->cdb_map + hash_offset;
364 /* looks like we failed... */
367 #endif /* HAVE_MMAP */
368 for (loop = 0; (loop < hash_offlen); ++loop) {
370 if (lseek(cdbp->fileno, (off_t) cur_offset,SEEK_SET) == -1) return DEFER;
371 if (cdb_bread(cdbp->fileno, packbuf,8) == -1) return DEFER;
372 item_hash = cdb_unpack(packbuf);
373 item_posn = cdb_unpack(packbuf + 4);
374 /* if the position is zero then we have a definite miss */
378 if (item_hash == key_hash) {
379 /* matching hash value */
380 if (lseek(cdbp->fileno, (off_t) item_posn, SEEK_SET) == -1) return DEFER;
381 if (cdb_bread(cdbp->fileno, packbuf, 8) == -1) return DEFER;
382 item_key_len = cdb_unpack(packbuf);
383 /* check key length matches */
384 if (item_key_len == key_len) {
385 /* finally check if key matches */
386 uschar * item_key = store_get(key_len);
387 if (cdb_bread(cdbp->fileno, item_key, key_len) == -1) return DEFER;
388 if (Ustrncmp(keystring, item_key, key_len) == 0) {
389 /* Reclaim some store */
390 store_reset(item_key);
391 /* matches - get data length */
392 item_dat_len = cdb_unpack(packbuf + 4);
393 /* then we build a new result string */
394 *result = store_get(item_dat_len + 1);
395 if (cdb_bread(cdbp->fileno, *result, item_dat_len) == -1)
397 (*result)[item_dat_len] = 0;
400 /* Reclaim some store */
401 store_reset(item_key);
406 /* handle warp round of table */
407 if (cur_offset == end_offset)
408 cur_offset = hash_offset;
415 /*************************************************
416 * Close entry point *
417 *************************************************/
419 /* See local README for interface description */
422 cdb_close(void *handle)
424 struct cdb_state * cdbp = handle;
428 munmap(CS cdbp->cdb_map, cdbp->filelen);
429 if (cdbp->cdb_map == cdbp->cdb_offsets)
430 cdbp->cdb_offsets = NULL;
432 #endif /* HAVE_MMAP */
434 (void)close(cdbp->fileno);
437 /* End of lookups/cdb.c */