/* Josh Pieper, (c) 2000
   Dan Risacher, (c) 2001
   Robert Munafo, (c) 2000-2001

   This file is distributed under the GPL, see file COPYING for details */

#include <stdio.h>
#include <stdlib.h>
#include <sys/param.h>
#include <sys/stat.h>

/* 0.4.28.c17 */
#include "sh_sys_types.h"

#include <signal.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <pthread.h>
#include <ctype.h>

#include "conf.h"
#include "gnut.h" /* 0.4.28.c01 */
#include "hash.h"
#include "lib.h"
#include "list.h"
#include "queue.h"
#include "share.h"


/* here we declare share's global variables... */
uint32 g_share_num = 0;
float64 g_share_size = 0;
Gnut_List *share_root = 0;

int sh_dbg_1;
int sh_qreplies_active;
int sh_qreplies_hi;

/* 0.4.28.c01 Moved mutex to gnut.c */

/* 0.4.28.c05 Removed the old Josh "share cache", an attempt at cacheing
   the results of share_search which was always disabled and wouldn't have
   worked too well either. The code I have removed was all inside #if 0 */

int have_find, have_perl;

void fre_si(share_item **x, int bugnum)
{
  yfre((void **) x, bugnum);
}

void fre_sia(share_item ***x, int bugnum)
{
  yfre((void **) x, bugnum);
}

void fre_mbv(match_bitvector *x, int bugnum)
{
  yfre((void **) x, bugnum);
}

/* sh_tokenize takes a string str and fills an array tokens with pointers
 * to the alphanumeric substrings in str. It will stop when it reaches
 * the end of the string or if the number of tokens found exceeds
 * maxtokens. 
 * IMPORTANT: str is destroyed in the process, some of its characters will
 * be set to null */
int sh_tokenize(char *str, char **tokens, int maxtokens)
{
  int i;
  int ia_this, ia_last;
  int nt;

  nt = 0;
  ia_this = ia_last = 0;
  i = 0;
  while((str[i]) && (nt < maxtokens)) {
    ia_last = ia_this;
    ia_this = (G_ISALNUM(str[i]));
    if (ia_this && (ia_last == 0)) {
      /* Beginning of a word */
      tokens[nt++] = str+i;
    } else if (ia_last && (ia_this == 0)) {
      /* A word just ended */
      str[i] = 0;
    }
    i++;
  }
  return nt;
}

/* match.tok_2 is a faster version of keyword_match written to be
 * used by share.search. The query is given by an array of strings "tokens"
 * with "nt" giving the number of tokens in the array. The string to
 * perform the query against is s1.
 *  match.tok_2 always does an AND search, and does not ignore case (because
 * both arguments are already lowercased before match.tok_2 is called)
 *  must_exact should be set if you want it to match only if *all* of the
 * letters and numbers in s1 match a letter or number in the tokens.
 *  The return value is nonzero if there is a match.
 * WARNING: s1 must be 7-bit ASCII, and if not its high bits will be cleared
 * by this routine.
 * WARNING: the array "tokens" will be changed (two entries will be swapped) */
int match_tok_2(char **tokens, int nt, char *s1, int must_exact)
{
  int i, j;
  int result;
  uint8 *p;
  int ftnum;
  int isexact;

  if (nt <= 0) {
    return 0;
  }

  if (0) {
    printf("mt2 ");
    printf("%i |", nt);
    for(j=0; j<nt; j++) {
      printf("%s|", tokens[j]);
    }
    printf(" %s \n", s1);
  }

  /* Check each token to see if it's in the string */
  result = 1; ftnum = -1;
  for(i=0; (i<nt) && result; i++) {
    p = (uint8 *) strstr(s1, tokens[i]);
    if (p) {
      /* Found at position p */

      /* To enforce AND matching we set the high bit of the characters
       * that were just matched */
      for(j=0; tokens[i][j]; j++) {
        p[j] |= 0x80;
      }
    } else {
      /* Not found */
      result = 0;
      ftnum = i;
    }
  }

  /* If the token that failed was not the first token, then we can optimize
   * the next call to this routine by putting the failed token first. */
  if (ftnum > 0) {
    char *tmp;

    /* printf("swap %s <-> %s\n", tokens[0], tokens[ftnum]); */
    tmp = tokens[0];
    tokens[0] = tokens[ftnum];
    tokens[ftnum] = tmp;
  }

  /* If must_exact, scan to see if there are any as-yet-untagged alphanumeric
     characters */
  isexact = 1;
  if (result && must_exact) {
    uint8 c;

    p = s1;
    for(j=0; p[j] && isexact; j++) {
      c = p[j];
      if (G_ISALNUM(c)) {
	isexact = 0;
      }
    }
  }

  /* Clear the high bits we set while matching */
  for(p = (uint8 *) s1; *p; p++) {
    *p &= 0x7f;
  }

  if (gc_debug_opts & 64) {
    if (result) {
      printf("mt2 match |");
      for(j=0; j<nt; j++) {
        printf("%s|", tokens[j]);
      }
      printf(" in %s\n", s1);
    }
  }

  return (result && isexact);
}

/* Gnut_List *share.search(char *query, int max)
 *
 * searches through the shares, and returns a list of up to <max>
 * shares that match the query */
Gnut_List *share_search(char *orig_query, int max, int *n_reslts)
{
  Gnut_List *nlist;
  Gnut_List *ltmp;
  share_item *si;
  share_item *copy;
  char *tok_query;
  int count;
  char *tokens[MAX_TOKENS];
  int nt;

  dqi(0x0076);

  sh_dbg_1 = 0;
  nlist = 0;
  *n_reslts = 0;

  GD_S(3, "share.search entering\n");

  tok_query = ystdup(orig_query, 438);
  dqi(0x0077);
  make_lc(tok_query);
  dqi(0x0078);
  nt = sh_tokenize(tok_query, tokens, MAX_TOKENS);
  dqi(0x0079);

#if 0
  {
    int j;
    printf("tokenize %s : ", orig_query);
    printf("%i |", nt);
    for(j=0; j<nt; j++) {
      printf("%s|", tokens[j]);
    }
    printf("\n");
  }
#endif

  if (nt == 0) {
    dqi(0x007a);
    fre_str(&tok_query, 160);
    return nlist;
  }

  /* We keep count of how many matches we got */
  count = 0;

  dqi(0x007b);

  for (ltmp=share_root; ltmp; ltmp=ltmp->next) {
    sh_dbg_1++;

    GD_S(4, "share.search ltmp="); GD_P(4, ltmp); GD_S(4, " orig_query=");
    GD_P(4, orig_query); GD_S(4, "\n");
    /* see if this share matches... */
    si = ltmp->data;

    /*    if ((sh_dbg_1 > 710) && (sh_dbg_1 < 720)) { */
#if 0
    if (sh_dbg_1 == 715) {
      printf("tokenize %s : ", orig_query);
      printf("%i |", nt);
      for(j=0; j<nt; j++) {
	printf("%s|", tokens[j]);
      }
      printf("\n");
      printf("%d %s\n", sh_dbg_1, si->lc_path);
    }
#endif

    if (match_tok_2(tokens, nt, si->lc_path, 0))
      {
	/* we need to make a copy of this node, and
	 * insert into the result list... */
	dqi(0x016d);
	copy = (share_item *) ymaloc(sizeof(share_item), 289);

	copy->path = ystdup(si->path, 440);
	copy->lc_path = ystdup(si->lc_path, 441);
	copy->fpath = ystdup(si->fpath, 442);
	copy->size = si->size;
	copy->ref = si->ref;

	dqi(0x016e);
	nlist = gnut_list_prepend(nlist, copy);

	count++;
	if (count >= max) {
	  break;
	}
	dqi(0x016f);
      }
  }
  dqi(0x007c);
  fre_str(&tok_query, 161);

  *n_reslts = count;
  
  GD_S(3, "share.search returning\n");
  return nlist;  
} 

/* Gnut_List *share_get_root()
 *
 * returns the share list, used to do ui */
Gnut_List *share_get_root()
{
  return share_root;
}

/* int share.totals(int *num, int *size)
 *
 * returns total number of shared files, and their total size
 * in kbytes (that 'k' is the SI k = 1000, not 1024!) */
void share_totals(uint32 *sh_num, float64 *sh_size)
{
  GD_S(3,"share.totals entering\n");
  *sh_num = g_share_num;
  *sh_size = g_share_size;

  GD_S(3, "share.totals returning share_num="); GD_U(3, g_share_num);
  GD_S(3, " share_size="); GD_F64(3, g_share_size); GD_S(3, " kbytes  *num=");
  GD_U(3, *sh_num); GD_S(3, " *size="); GD_F64(3, *sh_size); GD_S(3, "\n");
}

int share_insert(void *data, void *user_data)
{
  share_item *si,*nsi;
  Gnut_List **l;

  l = user_data;

  si = data;
  nsi = (share_item *) ymaloc(sizeof(share_item), 290);
  memcpy(nsi, si, sizeof(share_item));
  nsi->path = ystdup(si->path, 443);
  nsi->lc_path = ystdup(si->lc_path, 444);
  nsi->fpath = ystdup(si->fpath, 445);
  
  *l = gnut_list_prepend(*l,nsi);
  return 0;
}

Gnut_List * share_copy_list(Gnut_List *l)
{
  Gnut_List *nl=0;
  GD_S(3, "share_copy_list entering\n");
  
  gnut_list_foreach(l,share_insert,&nl);
  
  return nl;
}

/* callback, fr.ees up paths in share_item */
int share_delete(void * data, void * user_data)
{
  share_item *si;

  GD_S(3, "share_delete entering\n");

  si = data;
  fre_str(&(si->path), 163);
  fre_str(&(si->lc_path), 164);
  fre_str(&(si->fpath), 165);
  GD_S(3, "share_delete returning\n");
  return 0;
}

/* int share_clear_list(Gnut_List *l) 
 *
 * clears the share list specified by l */
int share_clear_list(Gnut_List *l)
{
  GD_S(3, "share_clear_list entering\n");

  gnut_list_foreach(l, share_delete, 0);
  gnut_list_fre(l);

  GD_S(3, "share_clear_list returning\n");
  return 0;
}
  
  
/* int share_clear()
 *
 * clears out the list of shares */
int share_clear()
{
  GD_S(3, "share_clear entering\n");
  gnut_list_foreach(share_root, share_delete, 0);
  gnut_list_fre(share_root);
  share_root = 0;
  g_share_num = 0;
  g_share_size = 0;
  GD_S(3, "share_clear returning\n");
  return 0;
}

/* share_item *share_find(int ref)
 *
 * locates the share indexed by ref and returns it */
share_item *share_find(uint32 ref)
{
  Gnut_List *ltmp;
  share_item *si,*si2;

  GD_S(3, "share_find entering\n");

  for (ltmp=share_root; ltmp; ltmp=gnut_list_next(ltmp)) {
    si = ltmp->data;
    if (si->ref == ref) {
      /* we found the correct one, now we'll make a copy
       * of it and return... */
      si2 = (share_item *) ymaloc(sizeof(share_item), 291);
      si2->ref = si->ref;
      si2->size = si->size;
      si2->path = ystdup(si->path, 427);
      si2->lc_path = ystdup(si->lc_path, 428);
      si2->fpath = ystdup(si->fpath, 429);
      GD_S(3, "share_find returning si2="); GD_P(3, si2); GD_S(3, "\n");
      return si2;
    }
  }
  GD_S(3, "share_find returning failure\n");
  return 0;
}

/* Scanning code should work with normal find (not GNU find)

dogcow@redback.com sent this:

in src/share.c, starting around line 549...

      sprintf(leafname, "find %s -follow \\( -type f "
              "! -name \"*.gnut\" -o -type l \\) "
        "-print"); 

    (without the printf jazz)

      [all the other popen and setup goo]

  while (fgets(pathname, OUR_MAX_PATH, findfp)) {
    slashloc = rindex(pathname, '/'); 
    leafname = (slashloc ? slashloc + 1 : pathname);

    [continue on as normal, with the lnend = leafname stuff]
 */


/* int scan.dir(char *path)
 *
 * adds the contents of path to the share_root list */
int share_scan_dir(char *path, int verbose)
{
  FILE *findfp;
  char *leafname;
  char *lnend;
  char *pathname;
  struct stat st;
  int ret;
  share_item *si;
  int sl;

  GD_S(3, "share_scan.dir entering\n");

  sl = conf_get_int("follow_symlinks");

  pthread_mutex_lock(&share_mutex);

  leafname = (char *) ymaloc(OUR_MAX_PATH, 293);
  pathname = (char *) ymaloc(OUR_MAX_PATH, 294);
  if (have_find) {
    if (verbose) {
      printf("Using 'find' to index %s\n", path);
    }
    if (conf_get_int("follow_symlinks")) {
      sprintf(leafname, "find %s -follow \\( -type f "
              "! -name \"*.gnut\" -o -type l \\) "
              "-printf %s",    path,    "\\%P\\\\n\\%p\\\\n");
    } else {
      sprintf(leafname, "find %s \\( -type f "
              "! -name \"*.gnut\" -o -type l \\) "
              "-printf %s",    path,    "\\%P\\\\n\\%p\\\\n");
    }
  } else if (have_perl) {
    if (verbose) {
      printf("Using 'perl' to index %s\n", path);
    }
    sprintf(leafname,
	    "perl -we '"

	    "$r = \"'%s'\"; "  /* $r is root */
	    "%%visited = (); "

	    "sub visit ($) "
	    "{ "
	    "  local ($p, $f, $d, $i); " /* path file device inode */
	    "  local *DIR; "

	    "  $p = shift; "
	    "  opendir DIR, \"$r/$p\"; "
	    "  while ($f = readdir DIR) "
	    "  { "
	    "    next if $f eq \"..\"; "
	    "    ($d, $i) = stat \"$r/$p$f\"; "
	    "    next if $visited{\"$d, $i\"}; "
	    "    $visited{\"$d, $i\"} = 1; "
	    "    next if $f eq \".\"; "
 
	    "    if (-f _) { "
            "      print \"$p$f\\n$r/$p$f\\n\" "
	    "      unless $f =~ /\\.gnut$/; "
	    "    } "
            "    elsif ((%d == 0) && (-s _)) { } "
	    "    elsif (-d _) { "
            "      visit(\"$p$f/\"); "
	    "    } "
	    "  } "
	    "} "
	    
	    "visit(\"\");'",
	    path, sl);
  } else {
    printf("NOTE: You cannot 'scan' shared files because you do not have\neither GNU 'find' or 'perl' on your system.\n");
    sprintf(leafname, "echo -n ''");
  }

  GD_S(1, "share_scan.dir about to run: \""); GD_S(1, leafname);
  GD_S(1, "\" \n");

  /* Open pipe from command */
  findfp = popen(leafname, "r");
  if (findfp == 0) {
    GD_S(1, "share_scan.dir pipe to find returned error: "); GD_I(1, errno);
    GD_S(1, "\n");
    pthread_mutex_unlock(&share_mutex);
    return -1;
  }
  GD_S(1, "share_scan.dir retrieving results from find...\n");
  while (fgets(leafname, OUR_MAX_PATH, findfp)) {
    if (fgets(pathname, OUR_MAX_PATH, findfp)) {
      /* remove trailing eol */
      lnend = leafname + strlen(leafname); lnend--; *lnend = 0;
      pathname[strlen(pathname)-1] = 0;

      lnend -= 5; /* for checking for '.gnut' */

      if (leafname[0] == '.') {
	/* ignore dot-files */
      } else if ((strlen(leafname) > 5)
		 && (lnend[0] == '.')
		 && (lnend[1] == 'g')
		 && (lnend[2] == 'n')
		 && (lnend[3] == 'u')
		 && (lnend[4] == 't')) {
	/* .gnut-files (actually already filtered out by the find command) */
      } else {
	ret = stat(pathname, &st);

	si = (share_item *) ymaloc(sizeof(share_item), 295);

	si->size = (uint32) (st.st_size); /* %%% man 2 stat does not specify if st_size is signed or unsigned */
	si->mtime = st.st_mtime;
	g_share_size += ((double) (st.st_size));
	g_share_num++;
	si->ref = g_share_num;

	si->path = ystdup(leafname, 433); /* returned in query reply */
	si->lc_path = ystdup(pathname, 434); /* used in query match */
	si->fpath = ystdup(pathname, 435); /* real fullpath */

        /* lowercase the lc_path */
        make_lc(si->lc_path);
        make_7bit(si->lc_path);

	share_root = gnut_list_prepend(share_root, si);
      }
    }
  }
  
  pclose(findfp);
  fre_str(&leafname, 166);
  fre_str(&pathname, 167);
  
  pthread_mutex_unlock(&share_mutex);
  
  return 0;
}

/* my test implementation of search result caching, what I've got is a
 * hash based on the query token, and a queue based on the time of
 * search when a new token is searched for, it is first checked for in
 * the hash, if it exists, then the Gnut_List of shares is used
 * otherwise, a new search is performed, this search result is then
 * appended to the hash and queue, with the oldest query being removed
 * from the queue and the hash, much like in the routing section. It
 * would be a good idea upon a match to move the item back to the
 * front of the list, so that it wouldn't expire as quickly if it was
 * accessed frequently */
uchar share_cache_hash_func(void *d)
{
  share_query_results *ss;
  uint32 crc;

  ss = d;
  crc = crc32_string(ss->query);
  crc ^= (crc >> 16);
  crc ^= (crc >> 8);
  return (crc & 0xff);
}

int share_cache_compare_func(void *a, void *b)
{
  share_query_results *ss,*ss2;

  ss = a;
  ss2 = b;

  return strcmp(ss->query, ss2->query);
}

int share_init()
{
  have_find = trycmd("find --version", "GNU fi");
  have_perl = trycmd("perl -we 'print \"Perl rules!\\n\";'", "rules");

  return 0;
}

/* take two share lists, and make a new list, not to exceed max num
 * which is the logical AND of the two inputs. */
Gnut_List *share_results_combine(Gnut_List *l1, Gnut_List *l2,int max)
{  
  int i=0;
  Gnut_List *tmp,*tmp2,*prev=0;
  Gnut_List *nl=0; 
  share_item *si1,*si2;
  share_item *nsi;
  
  tmp2 = l2;

  for (tmp=l1; tmp; tmp=tmp->next) {
    si1 = tmp->data;
    for (; tmp2; tmp2=tmp2->next) {
      si2 = tmp2->data;
      if (si2->ref == si1->ref) {
        /* they match, so we'll make a copy, and insert it into
         * the new list */
        nsi = (share_item *) ymaloc(sizeof(share_item), 288);
        memcpy(nsi, si2, sizeof(share_item));
        nsi->path = ystdup(si2->path, 424);
        nsi->lc_path = ystdup(si2->lc_path, 425);
        nsi->fpath = ystdup(si2->fpath, 426);
        nl = gnut_list_prepend(nl,nsi);
        
        if ((++i) > max) return nl;
        break;
      }
      if (si2->ref < si1->ref) {
        tmp2 = prev;
        break;
      }
      prev = tmp2;
    }
  }
  return nl;
}


/*
  The following description of the hashed query-match optimizations
  was supplied by Dan Risacher. He originally had his code confined
  to a separate file "share_hash.c" but upon merging his changes
  I decided to put them here.  -- RPM 20010611

    I would usually saturate my CPU when running gnut 0.4.23 on a
    450MHz Pentium III with an ADSL line sharing 2000+ files.  first,
    I thought it was a bug, but some quick profiling indicated the
    software was correct, just inefficient.

    This file, and the functions in share.c containing 'hash' is my
    attempt to re-write the search mechanism to make it somewhat
    faster.  I haven't profiled it that scientifically, but my
    un-scientific resutls (from using 'top') show it's about 10-20
    times faster, maybe more.

    The key data structure to my design is a hash-table of search
    terms with least-recently-used replacement semantics.  Nodes in
    the hash are connected to the rest of the strucure in two
    different dimensions:

    1.  elements in each hash bucket are connected by an unsorted
        doubly-linked list (->next and ->prev_nxt)

    2.  all the elements in the entire hash are connected by a
        doubly-linked "recency list" sorted by the recency that the
        term was searched for (->more and ->less)
    
    The actual "payload" of a node in the hash is a search term and a
    pointer to a bitvector, where each bit in the vector indicates if 
    the file in the overall share list matches that term.  

    This code actually changes the semantics of a search because it
    doesn't handle repeated terms the same way the old code did.  If I
    really cared about that, I'd add it back in by detecting it and
    handling it as a special case *after* this code ran.

   note: gnut still leaks memory.  I don't think I introduced any
   leaks, though, since I do everything possible with one-time
   allocations.  If I'm motivated I'll debug it sometime.  memprof is
   good.
*/

/* Notes on DR1's hash code by RPM:

   Bitvectors are saved for the recentmost N searchkeys, where N is
   currently hard-set to 1000. Users ought to be able to change this
   1000 to something else. A "searchkey" is a set of contiguous
   alphanumeric characters seen in a query string.

   DR1 was serving about 2300 files when he developed this code. His
   memory overhead for this search-match cache is therefore a little
   less than 300K bytes. For more typical Gnutella users (sharing
   maybe 100 files) the overhead would be about 20K.
 */

/*
#define HASH_DEBUG 1
*/

#define HASH_PROFILE 1

#define SHARE_HASH_N 500

/* 0.4.28.c01 Moved mutex to gnut.c */

share_item **si_vector = 0;
share_item **siv2 = (share_item **) 1;

static struct hlist* most_recent;
static struct hlist* least_recent;
static struct hlist hashnode[SHARE_HASH_N];
static struct hlist* hashtab[SHARE_HASH_N];
/* size measured in int32's */
int matchvectorsize = -1;
static match_bitvector matchvectors = 0;


static unsigned int hash(char *s)
{
  unsigned int hashval;
  
  for (hashval = 0; *s != '\0'; s++) {
    hashval = *s + 31 * hashval;
  }
  return hashval % SHARE_HASH_N;
}

#ifdef HASH_DEBUG
void count_position (struct hlist* key, int *lm, int *ml) 
{
  struct hlist* np;
  *lm = 0;
  for (np = least_recent;  np && np != key;  np = np->more) {
    (*lm)++;
  }
  *ml = 0;
  for (np = most_recent;  np && np != key;  np = np->less) {
    (*ml)++;
  }
}

int debug_recency_list(char * which, char *term)
{
  int bugs = 0;
  int count = 0 ;
  struct hlist* np;
  for (np = most_recent; np; np= np->less) {
    count++;
    if (count > 1010) {
      break;
    }
  }
  if (count != 1000) {
    if (gc_debug_opts & 128) { /* tagok */
      fprintf(stderr, "%s length most->least = %d\n", which, count);
    }
    bugs++;
  }
  count=0;
  for (np = least_recent; np; np= np->more) {
    count++;
    if (count > 1010) {
      break;
    }
  }
  if (count != 1000) {
    if (gc_debug_opts & 128) { /* tagok */
      fprintf(stderr, "%s length least->most = %d\n", which, count);
    }
    bugs++;
  }
  count=0;
  for (np = most_recent; np; np= np->more) {
    count++;
    if (count > 1010) {
      break;
    }
  }
  if (count != 1) {
    if (gc_debug_opts & 128) { /* tagok */
      fprintf(stderr, "%s length most->more = %d\n", which, count);
    }
    bugs++;
  }
  count=0;
  for (np = least_recent; np; np= np->less) {
    count++;
    if (count > 1010) {
      break;
    }
  }
  if (count != 1) {
    if (gc_debug_opts & 128) { /* tagok */
      fprintf(stderr, "%s length least->less = %d\n", which, count);
    }
    bugs++;
  }

  if (bugs) {
    if (gc_debug_opts & 128) { /* tagok */
      fprintf(stderr, "term is %s\n", term);
      fprintf(stderr, "most recent term was %s\n", most_recent->hl_term);
      fprintf(stderr, "least recent term was %s\n", least_recent->hl_term);
    }
  }

  return bugs;
}
#endif

int t_counter;
int t_hits;

void share_hash_init() 
{
  Gnut_List *glist;
  int mvsize;
  int i;

  pthread_mutex_lock(&share_hash_mutex);

  i = gnut_list_size(share_root);
  if (gc_debug_opts & 128) { /* tagok */
    printf("list %d share_num %u\n", i, g_share_num);
  }
  /*   share_num = i; */

  /* Calculate the size of the bit-vector that will be allocated for each
   * cached searchkey result. We need one bit for each shared file.
   * We can fit 32 bits in a word, so we take the number of files and
   * divide by 32, always rounding up */
  mvsize = (i>>5)+((i&((1<<5)-1))?1:0);
  if (mvsize != ((i+31)/32)) {
    if (gc_debug_opts & 128) { /* tagok */
      printf("I can't believe it! When i == %d, (i>>5)+((i&((1<<5)-1))?1:0)\nreally is different from (i+31)/32\n", i);
    }
  }

  /* si_vector is an array of pointers to the share_items. When returning
   * hits, we don't actually allocate and copy the share_items, instead we
   * just point to the existing ones already in the share list. This
   * array makes it more efficient to convert bits in the hitvectors to
   * their corresponding share_item pointers. */
  if (si_vector) {
    fre_sia(&si_vector, 521);
  }
  si_vector = (share_item**) ycaloc(i, sizeof(share_item*), 520);
  siv2 = si_vector;
  for (i=0, glist = share_root;  glist;  glist = glist->next) {
    si_vector[i++] = glist->data;
  }

  /* The hitvectors for the cache are allocated in one huge contiguous
   * block, and the hash data structure's entries each point to a
   * mvsize-word chunk of this block. Because it's so big and this style
   * of memory management is so ancient, I've decided to call this block
   * "Nessie". But I'll stop short of actually renaming the variable. */
  if (matchvectors) {
    fre_mbv(&matchvectors, 538);
  }
  matchvectors = (match_bitvector) ycaloc(mvsize * SHARE_HASH_N,
					  sizeof(uint32), 534);
  matchvectorsize = mvsize;

  for (i = 0;  i < SHARE_HASH_N;  i++) {
    if (hashnode[i].hl_term) {
      fre_str(&(hashnode[i].hl_term), 537);
    }
    hashnode[i].hl_mv = &matchvectors[i*mvsize];
    hashnode[i].prev_nxt = 0;
    hashnode[i].next = 0;
    hashnode[i].less =   (i > 0)            ? (&hashnode[i-1]) : 0;
    hashnode[i].more = (i+1 < SHARE_HASH_N) ? (&hashnode[i+1]) : 0;
  }
  least_recent = &hashnode[0];
  most_recent = &hashnode[SHARE_HASH_N-1];

  t_counter = t_hits = 0;
  sh_qreplies_active = 0;

  pthread_mutex_unlock(&share_hash_mutex);
}

unsigned int share_hash_lookup(char* s, match_bitvector *mv)
{
#ifdef HASH_PROFILE
  static  int counter = 0;
  static  int hits, misses;
#endif 
#ifdef HASH_DEBUG
  int lm, ml;
#endif
  struct hlist *np;
  unsigned int ret = 0;
  *mv = 0;

  /* begin critical section */
  pthread_mutex_lock(&share_hash_mutex);
#ifdef HASH_PROFILE
  counter++;
  if (counter > 999) {
    if (gc_debug_opts & 128) { /* tagok */
      fprintf(stderr,
	      "With cache size of %d, in 1000 terms, %d hits, %d misses\n",
	      SHARE_HASH_N, hits, misses);
    }
    t_counter = ((t_counter * 15L) / 16L) + counter;
    t_hits = ((t_hits * 15L) / 16L) + hits;
    counter = hits = misses = 0;
  }
  misses++;
#endif

  for (np = hashtab[hash(s)]; np; np = np->next) {
    if (strcmp(s, np->hl_term) == 0) {
#ifdef HASH_PROFILE
      hits++;
      misses--;
#endif

#ifdef HASH_DEBUG
      count_position(np, &lm, &ml);
#endif
      /* splice out the node we found and glue it to the top of the
       * recency list */

      if (np != most_recent) {
	
#ifdef HASH_DEBUG
	debug_recency_list("lookup 1", s);
#endif
	if(np == least_recent) {
	  least_recent = np->more;
	}	  
	
	if (np->more) {
	  np->more->less = np->less;
	}
	if (np->less) {
	  np->less->more = np->more;
	}
	most_recent->more = np;
	np->less = most_recent;
	np->more = 0;
	most_recent = np;

#ifdef HASH_DEBUG
	if (debug_recency_list("lookup 2", s)) {
	  fprintf(stderr, "lm is %d, ml is %d\n", lm, ml);
	  kill(getpid(), SIGSEGV);
	}
#endif
      }
      *mv = np->hl_mv;
      ret = np->n_matches;
      break;
    }
  }

  pthread_mutex_unlock(&share_hash_mutex);
  return ret;
}

/*

  Another comment from Dan Risacher, discussing share_hash_insert  -RPM

    Originally I tried to minimize the code in the critical section, in
    a vain attempt to maximize perfomance on SMP machines.
    Unfortunately, it didn't work, because of some subtle thread
    interleave cases.  The code below forces all searches to be
    serialized by the mutex, so in theory it might be slower than the
    old search code on multiprocessor machines.  In practice, it's
    about 10x faster (or more), so it still does better on any machine
    likely to run gnut.  Are many people using gnut on 8+ processor
    machines?

    There might be a middle ground.  I'm only seeing 50% cache hits,
    but signifinacntly lower CPU utilization, so a lot of the
    improvements I made were just in the overhead, such as eliminating
    many needless malloc/free pairs.  Of course, I'm sharing obscure
    music, and most searches are for pop music or porn, so users with
    more mainstream taste will see better results (because more terms
    will be cache hits).
*/

match_bitvector share_hash_insert(char *term, 
				  match_bitvector matchvector,
				  unsigned int n_matches)
{
  int i;
  unsigned int hashval;
  struct hlist* np;

  hashval = hash(term);

  /* begin critical section */
  pthread_mutex_lock(&share_hash_mutex);

  /* get the least recently accessed node */
  np = least_recent;

  if (np->hl_term) {
    fre_str(&(np->hl_term), 536);
  }

  /* remove from the hash table */
  if (np->prev_nxt) {
    *(np->prev_nxt) = np->next;
  }
  if (np->next) {
    np->next->prev_nxt = np->prev_nxt;
  }

  /* remove from the recency list */
  /* np->more should always be defined here... */
  least_recent = np->more;
  least_recent->less = 0;

  /* copy the data into the node */
  np->hl_term = ystdup(term, 535);
  for (i=0; i<matchvectorsize; i++) {
    np->hl_mv[i] = matchvector[i];
  }

  /* insert back into the hash */
  np->next = hashtab[hashval];
  np->prev_nxt = &hashtab[hashval];
  if (hashtab[hashval]) {
    hashtab[hashval]->prev_nxt = &np->next;
  }
  hashtab[hashval] = np;

  /* insert back into the recency list */
  most_recent->more = np;
  np->less = most_recent;
  np->more = 0;
  most_recent = np;
  np->n_matches = n_matches;
#ifdef HASH_DEBUG
  debug_recency_list("insert", term);
#endif
  pthread_mutex_unlock(&share_hash_mutex);
  /* end critical section */

  return np->hl_mv;
}

unsigned int new_search_term(char* term, match_bitvector *mv,
  match_bitvector scratch)
{
  unsigned int n_matches;
  share_item* si;
  uint32 i;

  n_matches = 0;

  dqi(0x00f0);

  if (gc_debug_opts & 128) { /* tagok */
    printf("share_num %u\n", g_share_num);
  }

  memset(scratch, 0, matchvectorsize * sizeof(uint32));
  dqi(0x00f1);
  for (i = 0; i < g_share_num; i++) {
    dqi(0x00f6);
    dqi(0x8000 + i);
    if (si_vector != siv2) {
      dqi(0x00f7);
    }
    si = si_vector[i];
    if (si) {
      if (si->lc_path) {
	if (strstr(si->lc_path, term)) {
	  scratch[i>>5] |= 1<<(i&((1<<5)-1));  /* ph34r my l33t m461k! */
	  n_matches++;
	}
      } else {
	/* How are the share list item names getting deleted? */
	dqi(0x00f2);
      }
    } else {
      /* How are these getting cleared? */
      dqi(0x00f5);
    }
  }
  dqi(0x00f3);
  *mv = share_hash_insert(term, scratch, n_matches);
  dqi(0x00f4);

  return n_matches;
}

/* Gnut_List *share_srch.hash(char *query, int max)
 *
 * searches through the shares in the hash table,
 * and returns a list of up to <max> shares that match the query
 *
 * This is a boolean AND match, ignoring case */
Gnut_List *share_srch_hash(char *orig_query, int max, int *n_reslts)
{
  Gnut_List *nlist;
  share_item *si;
  match_bitvector resultmv;
  match_bitvector termmv;
  match_bitvector scratch;
  unsigned int n_matches;
  char *t_query;
  char *terms[MAX_TOKENS];
  int nterms;
  int i, j, m, n, count;
  uint32 anyhits;
  Gnut_List * rv;

  rv = 0;
  nlist = 0;
  n_matches = 0;
  nterms = 0;
  i = j = m = n = count = 0;
  *n_reslts = 0;

  if (si_vector == 0) {
    return 0;
  }

  dqi(0x007d);

  /* do I really care if I change the query? */
  t_query = ystdup(orig_query, 560);
  make_lc(t_query);
  nterms = sh_tokenize(t_query, terms, MAX_TOKENS);

  if (nterms == 0) {
    dqi(0x007f);
    rv = 0;
    goto ss_exit2;
  }

  dqi(0x007e);

  resultmv = (unsigned int*) ycaloc(matchvectorsize, sizeof(uint32), 529);

  dqi(0x0081);

  /* We use alloca here because it allows multiple threads to be
   * in this routine simultaneously and without having to allocate
   * space on the heap. */
  /* %%% Are there any machines out there that don't support alloca? */
  scratch = (match_bitvector) alloca(matchvectorsize * sizeof(uint32));
  dqi(0x0080);

  pthread_mutex_lock(&share_hash_mutex);
  sh_qreplies_active++;
  if (sh_qreplies_hi < sh_qreplies_active) {
    sh_qreplies_hi = sh_qreplies_active;
  }
  if (gc_debug_opts & 128) { /* tagok */
    printf("ssh + %d %d\n", sh_qreplies_active, sh_qreplies_hi);
  }
  pthread_mutex_unlock(&share_hash_mutex);

  /* While processing the first term, we create the initial hitvector
   * resultmv. This will be AND-ed with the hitvectors from the 2nd and
   * subsequent terms. */
  n_matches = share_hash_lookup(terms[0], &termmv);
  dqi(0x0082);
  if (termmv == 0) {
    dqi(0x008e);
    n_matches = new_search_term(terms[0], &termmv, scratch);
    dqi(0x0083);
  }
  /* Check to see if this term doesn't match anything we're sharing */
  dqi(0x008f);
  if (n_matches == 0) {
    dqi(0x0084);
    rv = 0;
    goto ss_exit1;
  }
  /* Initial value of resultmv is just a copy of the hitvector for this
   * first term. */
  dqi(0x0090);
  for (j = 0;  j < matchvectorsize;  j++) {
    resultmv[j] = termmv[j];
  }
  dqi(0x0085);
  
  /* rest of terms are &='ed */
  for (i = 1;  i < nterms;  i++) {
    /* See if this term already has a hitvector in the cache */
    n_matches = share_hash_lookup(terms[i], &termmv);
    dqi(0x0086);
    if (termmv == 0) {
      /* We have to make a new hitvector */
      n_matches = new_search_term(terms[i], &termmv, scratch);
      dqi(0x0087);
    }
    /* Check to see if this keyword results in no hits */
    if (n_matches == 0) {
      dqi(0x0088);
      rv = 0;
      goto ss_exit1;
    }
    /* AND in the new hitvector */
    anyhits = 0;
    for (j = 0;  j < matchvectorsize;  j++) {
      resultmv[j] &= termmv[j];
      anyhits |= resultmv[j];
    }
    dqi(0x0089);
    /* Check again to see if there are no hits. This allows us to avoid
     * iterating for the next term and also keeps that next term out
     * of the cache, which makes the cache just slightly more relevant.
     * Why? if the query doesn't match anything we're sharing, that
     * raises the odds of the query being garbage, and garbage queries
     * are repeated less often. */
    if (anyhits == 0) {
      dqi(0x008a);
      rv = 0;
      goto ss_exit1;
    }
  }

  dqi(0x008b);

  /* We made it this far, that means there are actually some hits! So now
   * we have to cons up a list of the matches for use by gp_reply_make() */
  for (m = 0;  m < matchvectorsize;  m++) {
    while (resultmv[m]) {
      /* ffs(3) - find first bit set in a word <string.h> */
      n = ffs(resultmv[m]) - 1;
      si = si_vector[32*m+n];
      nlist = gnut_list_prepend(nlist, si);  /* freed in share_hash_clear_list */
      resultmv[m] &= ~(1<<n);
      count++;
      if (count >= max) {
	break;
      }
    }
    if (count >= max) {
      break;
    }
  }

  dqi(0x008c);

  rv = nlist;

 ss_exit1: ;

  if (rv == 0) {
    pthread_mutex_lock(&share_hash_mutex);
    sh_qreplies_active--;
    if (gc_debug_opts & 128) { /* tagok */
      printf("ssh - %d\n", sh_qreplies_active);
    }
    pthread_mutex_unlock(&share_hash_mutex);
  }

  fre_mbv(&resultmv, 533);

 ss_exit2: ;

  fre_str(&t_query, 526);

  dqi(0x008d);

  *n_reslts = count;

  return rv;
}

/* int share_hash_clear_list(Gnut_List *l) 
 *
 * clears the share list specified by l 
 *
 * similar in purpose to share_clear_list, this needs a distinct
 * function because the higher-performance code of the hash search
 * doesn't allocate separate share_items for each search result
*/
int share_hash_clear_list(Gnut_List *l)
{
  Gnut_List *gl;
  Gnut_List *l2;

  l2 = l;

  GD_S(3,"share_hash_clear_list entering\n");

  /* can't use gnut_list_fre, because I don't want to free the list
   * data items.  (they're the share_items in si_vector)  */
  while(l) {
    gl = l->next;
    fre_gl(&l, 527);
    l = gl;
  }

  if (l2) {
    pthread_mutex_lock(&share_hash_mutex);
    sh_qreplies_active--;
    if (gc_debug_opts & 128) { /* tagok */
      printf("shcl - %d\n", sh_qreplies_active);
    }
    pthread_mutex_unlock(&share_hash_mutex);
  }

  GD_S(3,"share_hash_clear_list returning\n");
  return 0;
}
