LCOV - code coverage report
Current view: top level - third_party/heimdal/lib/base - bsearch.c (source / functions) Hit Total Coverage
Test: coverage report for support-claim-type-attributes 6b5c566e Lines: 0 365 0.0 %
Date: 2023-11-21 12:31:41 Functions: 0 12 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2011, Secure Endpoints Inc.
       3             :  * All rights reserved.
       4             :  *
       5             :  * Redistribution and use in source and binary forms, with or without
       6             :  * modification, are permitted provided that the following conditions
       7             :  * are met:
       8             :  *
       9             :  * - Redistributions of source code must retain the above copyright
      10             :  *   notice, this list of conditions and the following disclaimer.
      11             :  *
      12             :  * - Redistributions in binary form must reproduce the above copyright
      13             :  *   notice, this list of conditions and the following disclaimer in
      14             :  *   the documentation and/or other materials provided with the
      15             :  *   distribution.
      16             :  *
      17             :  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      18             :  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      19             :  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
      20             :  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
      21             :  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
      22             :  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
      23             :  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
      24             :  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      25             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
      26             :  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
      27             :  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
      28             :  * OF THE POSSIBILITY OF SUCH DAMAGE.
      29             :  *
      30             :  */
      31             : 
      32             : #include "baselocl.h"
      33             : 
      34             : #include <sys/types.h>
      35             : #include <sys/stat.h>
      36             : #ifdef HAVE_IO_H
      37             : #include <io.h>
      38             : #endif
      39             : #ifdef HAVE_UNISTD_H
      40             : #include <unistd.h>
      41             : #endif
      42             : #include <fcntl.h>
      43             : #include <ctype.h>
      44             : #include <stdio.h>
      45             : #include <stdlib.h>
      46             : #include <string.h>
      47             : #ifdef HAVE_STRINGS_H
      48             : #include <strings.h>
      49             : #endif
      50             : #include <errno.h>
      51             : #include <assert.h>
      52             : 
      53             : /*
      54             :  * This file contains functions for binary searching flat text in memory
      55             :  * and in text files where each line is a [variable length] record.
      56             :  * Each record has a key and an optional value separated from the key by
      57             :  * unquoted whitespace.  Whitespace in the key, and leading whitespace
      58             :  * for the value, can be quoted with backslashes (but CR and LF must be
      59             :  * quoted in such a way that they don't appear in the quoted result).
      60             :  *
      61             :  * Binary searching a tree are normally a dead simple algorithm.  It
      62             :  * turns out that binary searching flat text with *variable* length
      63             :  * records is... tricky.  There's no indexes to record beginning bytes,
      64             :  * thus any index selected during the search is likely to fall in the
      65             :  * middle of a record.  When deciding to search a left sub-tree one
      66             :  * might fail to find the last record in that sub-tree on account of the
      67             :  * right boundary falling in the middle of it -- the chosen solution to
      68             :  * this makes left sub-tree searches slightly less efficient than right
      69             :  * sub-tree searches.
      70             :  *
      71             :  * If binary searching flat text in memory is tricky, using block-wise
      72             :  * I/O instead is trickier!  But it's necessary in order to support
      73             :  * large files (which we either can't or wouldn't want to read or map
      74             :  * into memory).  Each block we read has to be large enough that the
      75             :  * largest record can fit in it.  And each block might start and/or end
      76             :  * in the middle of a record.  Here it is the right sub-tree searches
      77             :  * that are less efficient than left sub-tree searches.
      78             :  *
      79             :  * bsearch_common() contains the common text block binary search code.
      80             :  *
      81             :  * _bsearch_text() is the interface for searching in-core text.
      82             :  * _bsearch_file() is the interface for block-wise searching files.
      83             :  */
      84             : 
      85             : struct bsearch_file_handle {
      86             :     int fd;          /* file descriptor */
      87             :     char *cache;     /* cache bytes */
      88             :     char *page;      /* one double-size page worth of bytes */
      89             :     size_t file_sz;  /* file size */
      90             :     size_t cache_sz; /* cache size */
      91             :     size_t page_sz;  /* page size */
      92             : };
      93             : 
      94             : /* Find a new-line */
      95             : static const char *
      96           0 : find_line(const char *buf, size_t i, size_t right)
      97             : {
      98           0 :     if (i == 0)
      99           0 :         return &buf[i];
     100           0 :     for (; i < right; i++) {
     101           0 :         if (buf[i] == '\n') {
     102           0 :             if ((i + 1) < right)
     103           0 :                 return &buf[i + 1];
     104           0 :             return NULL;
     105             :         }
     106             :     }
     107           0 :     return NULL;
     108             : }
     109             : 
     110             : /*
     111             :  * Common routine for binary searching text in core.
     112             :  *
     113             :  * Perform a binary search of a char array containing a block from a
     114             :  * text file where each line is a record (LF and CRLF supported).  Each
     115             :  * record consists of a key followed by an optional value separated from
     116             :  * the key by whitespace.  Whitespace can be quoted with backslashes.
     117             :  * It's the caller's responsibility to encode/decode keys/values if
     118             :  * quoting is desired; newlines should be encoded such that a newline
     119             :  * does not appear in the result.
     120             :  *
     121             :  * All output arguments are optional.
     122             :  *
     123             :  * Returns 0 if key is found, -1 if not found, or an error code such as
     124             :  * ENOMEM in case of error.
     125             :  *
     126             :  * Inputs:
     127             :  *
     128             :  * @buf          String to search
     129             :  * @sz           Size of string to search
     130             :  * @key          Key string to search for
     131             :  * @buf_is_start True if the buffer starts with a record, false if it
     132             :  *               starts in the middle of a record or if the caller
     133             :  *               doesn't know.
     134             :  *
     135             :  * Outputs:
     136             :  *
     137             :  * @value        Location to store a copy of the value (caller must free)
     138             :  * @location     Record location if found else the location where the
     139             :  *               record should be inserted (index into @buf)
     140             :  * @cmp          Set to less than or greater than 0 to indicate that a
     141             :  *               key not found would have fit in an earlier or later
     142             :  *               part of a file.  Callers should use this to decide
     143             :  *               whether to read a block to the left or to the right and
     144             :  *               search that.
     145             :  * @loops        Location to store a count of bisections required for
     146             :  *               search (useful for confirming logarithmic performance)
     147             :  */
     148             : static int
     149           0 : bsearch_common(const char *buf, size_t sz, const char *key,
     150             :                int buf_is_start, char **value, size_t *location,
     151             :                int *cmp, size_t *loops)
     152             : {
     153           0 :     const char *linep;
     154           0 :     size_t key_start, key_len; /* key string in buf */
     155           0 :     size_t val_start, val_len; /* value string in buf */
     156           0 :     int key_cmp = -1;
     157           0 :     size_t k;
     158           0 :     size_t l;    /* left side of buffer for binary search */
     159           0 :     size_t r;    /* right side of buffer for binary search */
     160           0 :     size_t rmax; /* right side of buffer for binary search */
     161           0 :     size_t i;    /* index into buffer, typically in the middle of l and r */
     162           0 :     size_t loop_count = 0;
     163           0 :     int ret = -1;
     164             : 
     165           0 :     if (value)
     166           0 :         *value = NULL;
     167           0 :     if (cmp)
     168           0 :         *cmp = 0;
     169           0 :     if (loops)
     170           0 :         *loops = 0;
     171             : 
     172             :     /* Binary search; file should be sorted */
     173           0 :     for (l = 0, r = rmax = sz, i = sz >> 1; i >= l && i < rmax; loop_count++) {
     174           0 :         heim_assert(i < sz, "invalid aname2lname db index");
     175             : 
     176             :         /* buf[i] is likely in the middle of a line; find the next line */
     177           0 :         linep = find_line(buf, i, rmax);
     178           0 :         k = linep ? linep - buf : i;
     179           0 :         if (linep == NULL || k >= rmax) {
     180             :             /*
     181             :              * No new line found to the right; search to the left then
     182             :              * but don't change rmax (this isn't optimal, but it's
     183             :              * simple).
     184             :              */
     185           0 :             if (i == l)
     186           0 :                 break;
     187           0 :             r = i;
     188           0 :             i = l + ((r - l) >> 1);
     189           0 :             continue;
     190             :         }
     191           0 :         i = k;
     192           0 :         heim_assert(i >= l && i < rmax, "invalid aname2lname db index");
     193             : 
     194             :         /* Got a line; check it */
     195             : 
     196             :         /* Search for and split on unquoted whitespace */
     197           0 :         val_start = 0;
     198           0 :         for (key_start = i, key_len = 0, val_len = 0, k = i; k < rmax; k++) {
     199           0 :             if (buf[k] == '\\') {
     200           0 :                 k++;
     201           0 :                 continue;
     202             :             }
     203           0 :             if (buf[k] == '\r' || buf[k] == '\n') {
     204             :                 /* We now know where the key ends, and there's no value */
     205           0 :                 key_len = k - i;
     206           0 :                 break;
     207             :             }
     208           0 :             if (!isspace((unsigned char)buf[k]))
     209           0 :                 continue;
     210             : 
     211           0 :             while (k < rmax && isspace((unsigned char)buf[k])) {
     212           0 :                 key_len = k - i;
     213           0 :                 k++;
     214             :             }
     215           0 :             if (k < rmax)
     216           0 :                 val_start = k;
     217             :             /* Find end of value */
     218           0 :             for (; k < rmax && buf[k] != '\0'; k++) {
     219           0 :                 if (buf[k] == '\r' || buf[k] == '\n') {
     220           0 :                     val_len = k - val_start;
     221           0 :                     break;
     222             :                 }
     223             :             }
     224           0 :             break;
     225             :         }
     226             : 
     227             :         /*
     228             :          * The following logic is for dealing with partial buffers,
     229             :          * which we use for block-wise binary searches of large files
     230             :          */
     231           0 :         if (key_start == 0 && !buf_is_start) {
     232             :             /*
     233             :              * We're at the beginning of a block that might have started
     234             :              * in the middle of a record whose "key" might well compare
     235             :              * as greater than the key we're looking for, so we don't
     236             :              * bother comparing -- we know key_cmp must be -1 here.
     237             :              */
     238           0 :             key_cmp = -1;
     239           0 :             break;
     240             :         }
     241           0 :         if ((val_len && buf[val_start + val_len] != '\n') ||
     242           0 :             (!val_len && buf[key_start + key_len] != '\n')) {
     243             :             /*
     244             :              * We're at the end of a block that ends in the middle of a
     245             :              * record whose "key" might well compare as less than the
     246             :              * key we're looking for, so we don't bother comparing -- we
     247             :              * know key_cmp must be >= 0 but we can't tell.  Our caller
     248             :              * will end up reading a double-size block to handle this.
     249             :              */
     250           0 :             key_cmp = 1;
     251           0 :             break;
     252             :         }
     253             : 
     254           0 :         key_cmp = strncmp(key, &buf[key_start], key_len);
     255           0 :         if (key_cmp == 0 && strlen(key) != key_len)
     256           0 :             key_cmp = 1;
     257           0 :         if (key_cmp < 0) {
     258             :             /* search left */
     259           0 :             r = rmax = (linep - buf);
     260           0 :             i = l + ((r - l) >> 1);
     261           0 :             if (location)
     262           0 :                 *location = key_start;
     263           0 :         } else if (key_cmp > 0) {
     264             :             /* search right */
     265           0 :             if (l == i)
     266           0 :                 break; /* not found */
     267           0 :             l = i;
     268           0 :             i = l + ((r - l) >> 1);
     269           0 :             if (location)
     270           0 :                 *location = val_start + val_len;
     271             :         } else {
     272             :             /* match! */
     273           0 :             if (location)
     274           0 :                 *location = key_start;
     275           0 :             ret = 0;
     276           0 :             if (val_len && value) {
     277             :                 /* Avoid strndup() so we don't need libroken here yet */
     278           0 :                 if ((*value = malloc(val_len + 1))) {
     279           0 :                     (void) memcpy(*value, &buf[val_start], val_len);
     280           0 :                     (*value)[val_len] = '\0';
     281             :                 } else {
     282           0 :                     ret = errno;
     283             :                 }
     284             :             }
     285           0 :             break;
     286             :         }
     287             :     }
     288             : 
     289           0 :     if (cmp)
     290           0 :         *cmp = key_cmp;
     291           0 :     if (loops)
     292           0 :         *loops = loop_count;
     293             : 
     294           0 :     return ret;
     295             : }
     296             : 
     297             : /*
     298             :  * Binary search a char array containing sorted text records separated
     299             :  * by new-lines (or CRLF).  Each record consists of a key and an
     300             :  * optional value following the key, separated from the key by unquoted
     301             :  * whitespace.
     302             :  *
     303             :  * All output arguments are optional.
     304             :  *
     305             :  * Returns 0 if key is found, -1 if not found, or an error code such as
     306             :  * ENOMEM in case of error.
     307             :  *
     308             :  * Inputs:
     309             :  *
     310             :  * @buf      Char array pointer
     311             :  * @buf_sz   Size of buf
     312             :  * @key      Key to search for
     313             :  *
     314             :  * Outputs:
     315             :  *
     316             :  * @value    Location where to put the value, if any (caller must free)
     317             :  * @location Record location if found else the location where the record
     318             :  *           should be inserted (index into @buf)
     319             :  * @loops    Location where to put a number of loops (or comparisons)
     320             :  *           needed for the search (useful for benchmarking)
     321             :  */
     322             : int
     323           0 : _bsearch_text(const char *buf, size_t buf_sz, const char *key,
     324             :                char **value, size_t *location, size_t *loops)
     325             : {
     326           0 :     return bsearch_common(buf, buf_sz, key, 1, value, location, NULL, loops);
     327             : }
     328             : 
     329             : #define MAX_BLOCK_SIZE (1024 * 1024)
     330             : #define DEFAULT_MAX_FILE_SIZE (1024 * 1024)
     331             : /*
     332             :  * Open a file for binary searching.  The file will be read in entirely
     333             :  * if it is smaller than @max_sz, else a cache of @max_sz bytes will be
     334             :  * allocated.
     335             :  *
     336             :  * Returns 0 on success, else an error number or -1 if the file is empty.
     337             :  *
     338             :  * Inputs:
     339             :  *
     340             :  * @fname   Name of file to open
     341             :  * @max_sz  Maximum size of cache to allocate, in bytes (if zero, default)
     342             :  * @page_sz Page size (must be a power of two, larger than 256, smaller
     343             :  *          than 1MB; if zero use default)
     344             :  * 
     345             :  * Outputs:
     346             :  *
     347             :  * @bfh     Handle for use with _bsearch_file() and _bsearch_file_close()
     348             :  * @reads   Number of reads performed
     349             :  */
     350             : int
     351           0 : _bsearch_file_open(const char *fname, size_t max_sz, size_t page_sz,
     352             :                     bsearch_file_handle *bfh, size_t *reads)
     353             : {
     354           0 :     bsearch_file_handle new_bfh = NULL;
     355           0 :     struct stat st;
     356           0 :     size_t i;
     357           0 :     int fd;
     358           0 :     int ret;
     359             : 
     360           0 :     *bfh = NULL;
     361             : 
     362           0 :     if (reads)
     363           0 :         *reads = 0;
     364             : 
     365           0 :     fd = open(fname, O_RDONLY);
     366           0 :     if (fd == -1)
     367           0 :         return errno;
     368             : 
     369           0 :     if (fstat(fd, &st) == -1) {
     370           0 :         ret = errno;
     371           0 :         goto err;
     372             :     }
     373             : 
     374           0 :     if (st.st_size == 0) {
     375           0 :         ret = -1; /* no data -> no binary search */
     376           0 :         goto err;
     377             :     }
     378             : 
     379             :     /* Validate / default arguments */
     380           0 :     if (max_sz == 0)
     381           0 :         max_sz = DEFAULT_MAX_FILE_SIZE;
     382           0 :     for (i = page_sz; i; i >>= 1) {
     383             :         /* Make sure page_sz is a power of two */
     384           0 :         if ((i % 2) && (i >> 1)) {
     385           0 :             page_sz = 0;
     386           0 :             break;
     387             :         }
     388             :     }
     389           0 :     if (page_sz == 0)
     390             : #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
     391             :         page_sz = st.st_blksize;
     392             : #else
     393           0 :         page_sz = 4096;
     394             : #endif
     395           0 :     for (i = page_sz; i; i >>= 1) {
     396             :         /* Make sure page_sz is a power of two */
     397           0 :         if ((i % 2) && (i >> 1)) {
     398             :             /* Can't happen! Filesystems always use powers of two! */
     399           0 :             page_sz = 4096;
     400           0 :             break;
     401             :         }
     402             :     }
     403           0 :     if (page_sz > MAX_BLOCK_SIZE)
     404           0 :         page_sz = MAX_BLOCK_SIZE;
     405             : 
     406           0 :     new_bfh = calloc(1, sizeof (*new_bfh));
     407           0 :     if (new_bfh == NULL) {
     408           0 :         ret = ENOMEM;
     409           0 :         goto err;
     410             :     }
     411             : 
     412           0 :     new_bfh->fd = fd;
     413           0 :     new_bfh->page_sz = page_sz;
     414           0 :     new_bfh->file_sz = st.st_size;
     415             : 
     416           0 :     if (max_sz >= st.st_size) {
     417             :         /* Whole-file method */
     418           0 :         new_bfh->cache = malloc(st.st_size + 1);
     419           0 :         if (new_bfh->cache) {
     420           0 :             new_bfh->cache[st.st_size] = '\0';
     421           0 :             new_bfh->cache_sz = st.st_size;
     422           0 :             ret = read(fd, new_bfh->cache, st.st_size);
     423           0 :             if (ret < 0) {
     424           0 :                 ret = errno;
     425           0 :                 goto err;
     426             :             }
     427           0 :             if (ret != st.st_size) {
     428           0 :                 ret = EIO; /* XXX ??? */
     429           0 :                 goto err;
     430             :             }
     431           0 :             if (reads)
     432           0 :                 *reads = 1;
     433           0 :             (void) close(fd);
     434           0 :             new_bfh->fd = -1;
     435           0 :             *bfh = new_bfh;
     436           0 :             return 0;
     437             :         }
     438             :     }
     439             : 
     440             :     /* Block-size method, or above malloc() failed */
     441           0 :     new_bfh->page = malloc(new_bfh->page_sz << 1);
     442           0 :     if (new_bfh->page == NULL) {
     443             :         /* Can't even allocate a single double-size page! */
     444           0 :         ret = ENOMEM;
     445           0 :         goto err;
     446             :     }
     447             : 
     448           0 :     new_bfh->cache_sz = max_sz < st.st_size ? max_sz : st.st_size;
     449           0 :     new_bfh->cache = malloc(new_bfh->cache_sz);
     450           0 :     *bfh = new_bfh;
     451             : 
     452             :     /*
     453             :      * malloc() may have failed because we were asking for a lot of
     454             :      * memory, but we may still be able to operate without a cache,
     455             :      * so let's not fail.
     456             :      */
     457           0 :     if (new_bfh->cache == NULL) {
     458           0 :         new_bfh->cache_sz = 0;
     459           0 :         return 0;
     460             :     }
     461             : 
     462             :     /* Initialize cache */
     463           0 :     for (i = 0; i < new_bfh->cache_sz; i += new_bfh->page_sz)
     464           0 :         new_bfh->cache[i] = '\0';
     465           0 :     return 0;
     466             : 
     467           0 : err:
     468           0 :     (void) close(fd);
     469           0 :     if (new_bfh) {
     470           0 :         free(new_bfh->page);
     471           0 :         free(new_bfh->cache);
     472           0 :         free(new_bfh);
     473             :     }
     474           0 :     return ret;
     475             : }
     476             : 
     477             : /*
     478             :  * Indicate whether the given binary search file handle will be searched
     479             :  * with block-wise method.
     480             :  */
     481             : void
     482           0 : _bsearch_file_info(bsearch_file_handle bfh,
     483             :                     size_t *page_sz, size_t *max_sz, int *blockwise)
     484             : {
     485           0 :     if (page_sz)
     486           0 :         *page_sz = bfh->page_sz;
     487           0 :     if (max_sz)
     488           0 :         *max_sz = bfh->cache_sz;
     489           0 :     if (blockwise)
     490           0 :         *blockwise = (bfh->file_sz != bfh->cache_sz);
     491           0 : }
     492             : 
     493             : /*
     494             :  * Close the given binary file search handle.
     495             :  *
     496             :  * Inputs:
     497             :  *
     498             :  * @bfh Pointer to variable containing handle to close.
     499             :  */
     500             : void
     501           0 : _bsearch_file_close(bsearch_file_handle *bfh)
     502             : {
     503           0 :     if (!*bfh)
     504           0 :         return;
     505           0 :     if ((*bfh)->fd >= 0)
     506           0 :         (void) close((*bfh)->fd);
     507           0 :     if ((*bfh)->page)
     508           0 :         free((*bfh)->page);
     509           0 :     if ((*bfh)->cache)
     510           0 :         free((*bfh)->cache);
     511           0 :     free(*bfh);
     512           0 :     *bfh = NULL;
     513             : }
     514             : 
     515             : /*
     516             :  * Private function to get a page from a cache.  The cache is a char
     517             :  * array of 2^n - 1 double-size page worth of bytes, where n is the
     518             :  * number of tree levels that the cache stores.  The cache can be
     519             :  * smaller than n implies.
     520             :  *
     521             :  * The page may or may not be valid.  If the first byte of it is NUL
     522             :  * then it's not valid, else it is.
     523             :  *
     524             :  * Returns 1 if page is in cache and valid, 0 if the cache is too small
     525             :  * or the page is invalid.  The page address is output in @buf if the
     526             :  * cache is large enough to contain it regardless of whether the page is
     527             :  * valid.
     528             :  *
     529             :  * Inputs:
     530             :  *
     531             :  * @bfh      Binary search file handle
     532             :  * @level    Level in the tree that we want a page for
     533             :  * @page_idx Page number in the given level (0..2^level - 1)
     534             :  *
     535             :  * Outputs:
     536             :  *
     537             :  * @buf      Set to address of page if the cache is large enough
     538             :  */
     539             : static int
     540           0 : get_page_from_cache(bsearch_file_handle bfh, size_t level, size_t page_idx,
     541             :                     char **buf)
     542             : {
     543           0 :     size_t idx = 0;
     544           0 :     size_t page_sz;
     545             : 
     546           0 :     page_sz = bfh->page_sz << 1; /* we use double-size pages in the cache */
     547             : 
     548           0 :     *buf = NULL;
     549             : 
     550             :     /*
     551             :      * Compute index into cache.  The cache is basically an array of
     552             :      * double-size pages.  The first (zeroth) double-size page in the
     553             :      * cache will be the middle page of the file -- the root of the
     554             :      * tree.  The next two double-size pages will be the left and right
     555             :      * pages of the second level in the tree.  The next four double-size
     556             :      * pages will be the four pages at the next level.  And so on for as
     557             :      * many pages as fit in the cache.
     558             :      *
     559             :      * The page index is the number of the page at the given level.  We
     560             :      * then compute (2^level - 1 + page index) * 2page size, check that
     561             :      * we have that in the cache, check that the page has been read (it
     562             :      * doesn't start with NUL).
     563             :      */
     564           0 :     if (level)
     565           0 :         idx = (1 << level) - 1 + page_idx;
     566           0 :     if (((idx + 1) * page_sz * 2) > bfh->cache_sz)
     567           0 :         return 0;
     568             : 
     569           0 :     *buf = &bfh->cache[idx * page_sz * 2];
     570           0 :     if (bfh->cache[idx * page_sz * 2] == '\0')
     571           0 :         return 0; /* cache[idx] == NUL -> page not loaded in cache */
     572           0 :     return 1;
     573             : }
     574             : 
     575             : /*
     576             :  * Private function to read a page of @page_sz from @fd at offset @off
     577             :  * into @buf, outputing the number of bytes read, which will be the same
     578             :  * as @page_sz unless the page being read is the last page, in which
     579             :  * case the number of remaining bytes in the file will be output.
     580             :  *
     581             :  * Returns 0 on success or an errno value otherwise (EIO if reads are
     582             :  * short).
     583             :  *
     584             :  * Inputs:
     585             :  *
     586             :  * @bfh        Binary search file handle
     587             :  * @level      Level in the binary search tree that we're at
     588             :  * @page_idx   Page "index" at the @level of the tree that we want
     589             :  * @page       Actual page number that we want
     590             :  * want_double Whether we need a page or double page read
     591             :  *
     592             :  * Outputs:
     593             :  *
     594             :  * @buf        Page read or cached
     595             :  * @bytes      Bytes read (may be less than page or double page size in
     596             :  *             the case of the last page, of course)
     597             :  */
     598             : static int
     599           0 : read_page(bsearch_file_handle bfh, size_t level, size_t page_idx, size_t page,
     600             :           int want_double, const char **buf, size_t *bytes)
     601             : {
     602           0 :     int ret;
     603           0 :     off_t off;
     604           0 :     size_t expected;
     605           0 :     size_t wanted;
     606           0 :     char *page_buf;
     607             : 
     608             :     /* Figure out where we're reading and how much */
     609           0 :     off = page * bfh->page_sz;
     610           0 :     if (off < 0)
     611           0 :         return EOVERFLOW;
     612             : 
     613           0 :     wanted = bfh->page_sz << want_double;
     614           0 :     expected = ((bfh->file_sz - off) > wanted) ? wanted : bfh->file_sz - off;
     615             : 
     616           0 :     if (get_page_from_cache(bfh, level, page_idx, &page_buf)) {
     617           0 :         *buf = page_buf;
     618           0 :         *bytes = expected;
     619           0 :         return 0; /* found in cache */
     620             :     }
     621             : 
     622             : 
     623           0 :     *bytes = 0;
     624           0 :     *buf = NULL;
     625             : 
     626             :     /* OK, we have to read a page or double-size page */
     627             : 
     628           0 :     if (page_buf)
     629           0 :         want_double = 1; /* we'll be caching; we cache double-size pages */
     630             :     else
     631           0 :         page_buf = bfh->page; /* we won't cache this page */
     632             : 
     633           0 :     wanted = bfh->page_sz << want_double;
     634           0 :     expected = ((bfh->file_sz - off) > wanted) ? wanted : bfh->file_sz - off;
     635             : 
     636             : #ifdef HAVE_PREAD
     637           0 :     ret = pread(bfh->fd, page_buf, expected, off);
     638             : #else
     639             :     if (lseek(bfh->fd, off, SEEK_SET) == (off_t)-1)
     640             :         return errno;
     641             :     ret = read(bfh->fd, page_buf, expected);
     642             : #endif
     643           0 :     if (ret < 0)
     644           0 :         return errno;
     645             :     
     646           0 :     if (ret != expected)
     647           0 :         return EIO; /* XXX ??? */
     648             : 
     649           0 :     *buf = page_buf;
     650           0 :     *bytes = expected;
     651           0 :     return 0;
     652             : }
     653             : 
     654             : /*
     655             :  * Perform a binary search of a file where each line is a record (LF and
     656             :  * CRLF supported).  Each record consists of a key followed by an
     657             :  * optional value separated from the key by whitespace.  Whitespace can
     658             :  * be quoted with backslashes.  It's the caller's responsibility to
     659             :  * encode/decode keys/values if quoting is desired; newlines should be
     660             :  * encoded such that a newline does not appear in the result.
     661             :  *
     662             :  * The search is done with block-wise I/O (i.e., the whole file is not
     663             :  * read into memory).
     664             :  *
     665             :  * All output arguments are optional.
     666             :  *
     667             :  * Returns 0 if key is found, -1 if not found, or an error code such as
     668             :  * ENOMEM in case of error.
     669             :  *
     670             :  * NOTE: We could improve this by not freeing the buffer, instead
     671             :  *       requiring that the caller provide it.  Further, we could cache
     672             :  *       the top N levels of [double-size] pages (2^N - 1 pages), which
     673             :  *       should speed up most searches by reducing the number of reads
     674             :  *       by N.
     675             :  *
     676             :  * Inputs:
     677             :  *
     678             :  * @fd           File descriptor (file to search)
     679             :  * @page_sz      Page size (if zero then the file's st_blksize will be used)
     680             :  * @key          Key string to search for
     681             :  *
     682             :  * Outputs:
     683             :  *
     684             :  * @value        Location to store a copy of the value (caller must free)
     685             :  * @location     Record location if found else the location where the
     686             :  *               record should be inserted (index into @buf)
     687             :  * @loops        Location to store a count of bisections required for
     688             :  *               search (useful for confirming logarithmic performance)
     689             :  * @reads        Location to store a count of pages read during search
     690             :  *               (useful for confirming logarithmic performance)
     691             :  */
     692             : int
     693           0 : _bsearch_file(bsearch_file_handle bfh, const char *key,
     694             :                char **value, size_t *location, size_t *loops, size_t *reads)
     695             : {
     696           0 :     int ret;
     697           0 :     const char *buf;
     698           0 :     size_t buf_sz;
     699           0 :     size_t page, l, r;
     700           0 :     size_t my_reads = 0;
     701           0 :     size_t my_loops_total = 0;
     702           0 :     size_t my_loops;
     703           0 :     size_t level;        /* level in the tree */
     704           0 :     size_t page_idx = 0; /* page number in the tree level */
     705           0 :     size_t buf_location;
     706           0 :     int cmp;
     707           0 :     int buf_ends_in_eol = 0;
     708           0 :     int buf_is_start = 0;
     709             : 
     710           0 :     if (reads)
     711           0 :         *reads = 0;
     712           0 :     if (value)
     713           0 :         *value = NULL;
     714           0 :     if (loops)
     715           0 :         *loops = 0;
     716             : 
     717             :     /* If whole file is in memory then search that and we're done */
     718           0 :     if (bfh->file_sz == bfh->cache_sz)
     719           0 :         return _bsearch_text(bfh->cache, bfh->cache_sz, key, value, location, loops);
     720             : 
     721             :     /* Else block-wise binary search */
     722             : 
     723           0 :     l = 0;
     724           0 :     r = (bfh->file_sz / bfh->page_sz) + 1;
     725           0 :     for (level = 0, page = r >> 1; page >= l && page < r ; level++) {
     726           0 :         ret = read_page(bfh, level, page_idx, page, 0, &buf, &buf_sz);
     727           0 :         if (ret != 0)
     728           0 :             return ret;
     729           0 :         my_reads++;
     730           0 :         if (buf[buf_sz - 1] == '\r' || buf[buf_sz - 1] == '\n')
     731           0 :             buf_ends_in_eol = 1;
     732             :         else
     733           0 :             buf_ends_in_eol = 0;
     734             : 
     735           0 :         buf_is_start = page == 0 ? 1 : 0;
     736           0 :         ret = bsearch_common(buf, (size_t)buf_sz, key, buf_is_start,
     737             :                              value, &buf_location, &cmp, &my_loops);
     738           0 :         if (ret > 0)
     739           0 :             return ret;
     740             :         /* Found or no we update stats */
     741           0 :         my_loops_total += my_loops;
     742           0 :         if (loops)
     743           0 :             *loops = my_loops_total;
     744           0 :         if (reads)
     745           0 :             *reads = my_reads;
     746           0 :         if (location)
     747           0 :             *location = page * bfh->page_sz + buf_location;
     748           0 :         if (ret == 0)
     749           0 :             return 0; /* found! */
     750             :         /* Not found */
     751           0 :         if (cmp < 0) {
     752             :             /* Search left */
     753           0 :             page_idx <<= 1;
     754           0 :             r = page;
     755           0 :             page = l + ((r - l) >> 1);
     756           0 :             continue;
     757             :         } else {
     758             :             /*
     759             :              * Search right, but first search the current and next
     760             :              * blocks in case that the record we're looking for either
     761             :              * straddles the boundary between this and the next record,
     762             :              * or in case the record starts exactly at the next page.
     763             :              */
     764           0 :             heim_assert(cmp > 0, "cmp > 0");
     765             : 
     766           0 :             if (!buf_ends_in_eol || page == l || page == (r - 1)) {
     767           0 :                 ret = read_page(bfh, level, page_idx, page, 1, &buf, &buf_sz);
     768           0 :                 if (ret != 0)
     769           0 :                     return ret;
     770           0 :                 my_reads++;
     771             : 
     772           0 :                 buf_is_start = page == l ? 1 : 0;
     773             : 
     774           0 :                 ret = bsearch_common(buf, (size_t)buf_sz, key, buf_is_start,
     775             :                                      value, &buf_location, &cmp, &my_loops);
     776           0 :                 if (ret > 0)
     777           0 :                     return ret;
     778           0 :                 my_loops_total += my_loops;
     779           0 :                 if (loops)
     780           0 :                     *loops = my_loops_total;
     781           0 :                 if (reads)
     782           0 :                     *reads = my_reads;
     783           0 :                 if (location)
     784           0 :                     *location = page * bfh->page_sz + buf_location;
     785           0 :                 if (ret == 0)
     786           0 :                     return 0;
     787             :             }
     788             : 
     789             :             /* Oh well, search right */
     790           0 :             if (l == page && r == (l + 1))
     791           0 :                 break;
     792           0 :             page_idx = (page_idx << 1) + 1;
     793           0 :             l = page;
     794           0 :             page = l + ((r - l) >> 1);
     795           0 :             continue;
     796             :         }
     797             :     }
     798           0 :     return -1;
     799             : }
     800             : 
     801             : 
     802             : static int
     803           0 : stdb_open(void *plug, const char *dbtype, const char *dbname,
     804             :              heim_dict_t options, void **db, heim_error_t *error)
     805             : {
     806           0 :     bsearch_file_handle bfh;
     807           0 :     char *p;
     808           0 :     int ret;
     809             : 
     810           0 :     if (error)
     811           0 :         *error = NULL;
     812           0 :     if (dbname == NULL || *dbname == '\0') {
     813           0 :         if (error)
     814           0 :             *error = heim_error_create(EINVAL,
     815           0 :                                        N_("DB name required for sorted-text DB "
     816             :                                           "plugin", ""));
     817           0 :         return EINVAL;
     818             :     }
     819           0 :     p = strrchr(dbname, '.');
     820           0 :     if (p == NULL || strcmp(p, ".txt") != 0) {
     821           0 :         if (error)
     822           0 :             *error = heim_error_create(ENOTSUP,
     823           0 :                                        N_("Text file (name ending in .txt) "
     824             :                                        "required for sorted-text DB plugin",
     825             :                                        ""));
     826           0 :         return ENOTSUP;
     827             :     }
     828             : 
     829           0 :     ret = _bsearch_file_open(dbname, 0, 0, &bfh, NULL);
     830           0 :     if (ret)
     831           0 :         return ret;
     832             : 
     833           0 :     *db = bfh;
     834           0 :     return 0;
     835             : }
     836             : 
     837             : static int
     838           0 : stdb_close(void *db, heim_error_t *error)
     839             : {
     840           0 :     bsearch_file_handle bfh = db;
     841             : 
     842           0 :     if (error)
     843           0 :         *error = NULL;
     844           0 :     _bsearch_file_close(&bfh);
     845           0 :     return 0;
     846             : }
     847             : 
     848             : static heim_data_t
     849           0 : stdb_copy_value(void *db, heim_string_t table, heim_data_t key,
     850             :                heim_error_t *error)
     851             : {
     852           0 :     bsearch_file_handle bfh = db;
     853           0 :     const char *k;
     854           0 :     char *v = NULL;
     855           0 :     heim_data_t value;
     856           0 :     int ret;
     857             : 
     858           0 :     if (error)
     859           0 :         *error = NULL;
     860             : 
     861           0 :     if (table == NULL)
     862           0 :         table = HSTR("");
     863             : 
     864           0 :     if (table != HSTR(""))
     865           0 :         return NULL;
     866             : 
     867           0 :     if (heim_get_tid(key) == HEIM_TID_STRING)
     868           0 :         k = heim_string_get_utf8((heim_string_t)key);
     869             :     else
     870           0 :         k = (const char *)heim_data_get_ptr(key);
     871           0 :     ret = _bsearch_file(bfh, k, &v, NULL, NULL, NULL);
     872           0 :     if (ret == 0 && v == NULL)
     873           0 :         ret = -1; /* Quiet lint */
     874           0 :     if (ret != 0) {
     875           0 :         if (ret > 0 && error)
     876           0 :             *error = heim_error_create(ret, "%s", strerror(ret));
     877           0 :         return NULL;
     878             :     }
     879           0 :     value = heim_data_create(v, strlen(v));
     880           0 :     free(v);
     881             :     /* XXX Handle ENOMEM */
     882           0 :     return value;
     883             : }
     884             : 
     885             : struct heim_db_type heim_sorted_text_file_dbtype = {
     886             :     1, stdb_open, NULL, stdb_close, NULL, NULL, NULL, NULL, NULL, NULL,
     887             :     stdb_copy_value, NULL, NULL, NULL
     888             : };

Generated by: LCOV version 1.14