#include <stdio.h>

#include <string.h>
#include <time.h>
#include <limits.h>

//#define BM_DEBUG
#define RESULTS_PER_FUNCTION 20
#define TESTLOOPS 500000


/* Author: David Rowley, 2008-08-30.
 * Purpose: Test functions for possible strpos patch for postgresql 8.4
 * 
 * Notes:
 * The reading should be ware of the expression:
 * "It's like looking for a needle in a haystack."
 * 
 * #define BM_DEBUG if you want to view how the function is searching
 *
 * In order to support multi byte characters we implement a lossy
 * hashing system to store the last occurence of each character.
 * The character being searched must be ANDed with BM_AND to
 * stay within the bounds of the array.
 * The size of this array is a trade off between, the memory it
 * uses along with the time to initalize it vs. the mix of chars
 * likely to be in the search string (needle). UTF8 and the like
 * are likely to benefit from larger arrays. Short ASCII strings
 * like smaller arrays. This is all to do with the time it takes
 * to prepare the array for searching.
 *
 * Benchmarking results show that the currently implementation is
 * pretty hard to beat for very small strings. In an attempt to
 * gain better performance all round I've created version 8 of
 * the function. This version contains code to allow the hash 
 * array to be a variable size depending on the size of the haystack.
 * This method allows the function to perform some cost based
 * optimization based on the length of the haystack, however
 * it should be noted that this still greatly depends on the mix
 * of characters in both the needle and haystack. It is possible
 * to preprocess the haystack to check the mix of characters,
 * however doing this is very likely not worth the additional CPU
 * cycles.
 */

/* valid values, 2,4,8,16,32,64,128,256. Anything else is NOT good or probably not good */
#define BM_HASH_SIZE 256

/* Valid values, 1,3,7,15,31,63,127,255. Must be one less than BM_HASH_SIZE as this is used to
 * binary AND with the character to get the array element.
 */
#define BM_AND (BM_HASH_SIZE-1)

/* Function Prototypes, please see function bodies for comments on these. */
int pg_strncmp(const char *a, const char *b, size_t n);
static int text_position_next(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);

static int bmstrpos_v1(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v2(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v3(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v4(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v5(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v6(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v7(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);
static int bmstrpos_v8(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack);



int main(int argc, char **argv)
{
  clock_t start;
  clock_t end;
  int i;
  int lneedle,lhaystack;
  int r;
  int pos;


  if (argc < 3) {
    printf("Syntax: %s <needle> <hay stack>\n");
    return -1;
  }

  lneedle = strlen(argv[1]);
  lhaystack = strlen(argv[2]);

 
#ifdef BM_DEBUG
  /* Debug code, change this to the function you wish to debug or learn about */

  pos = bmstrpos_v2(0, argv[1], argv[2], strlen(argv[1]), strlen(argv[2]));
  if (pos > 0)
  {
    printf("needle found! The arrow points to the start of the needle\n");
    printf("%s\n", argv[2]);
    printf("%*s\n", pos, "^");
  }
#else

  /* Start benchmarking */

  printf("v1 (single char only)\n");
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v1(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v2 (single char only) (missing char = needle len)\n");
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v2(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v3 (single char only) (static array)\n");
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v3(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v4 (supports multi byte chars) (%u array elements)\n", BM_HASH_SIZE);
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v4(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v5 (supports multi byte chars) (%u array elements)\n", BM_HASH_SIZE);
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v5(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v6 (supports multi byte chars) (static array) (%u array elements)\n", BM_HASH_SIZE);
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v6(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v7 (mising char = needle len) (%u array elements)\n", BM_HASH_SIZE);
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v7(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("v8 (mising char = needle len) (variable array elements)\n");
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      bmstrpos_v8(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

  printf("next (mock up of current version)\n");
  for(r = 0;r<RESULTS_PER_FUNCTION;r++)
  {
    start = clock();

    for (i=0;i<TESTLOOPS;i++)
    {
      text_position_next(0, argv[1], argv[2], lneedle, lhaystack);
    }

    end = clock();
    printf("%d\n", (end - start));
  }

#endif /* BM_DEBUG */
  return 0;
}

int pg_strncmp(const char *a, const char *b, size_t n)
{
  const char* ra = a;
  const char* rb = b;
  int left = n;
  if (!left--)
    return 0;
  while (*ra == *rb) {
    if (!*ra++ || !left--)
      return 0;
    else
      ++rb;
  }
  return (*ra - *rb);
}


/* Mockup version of current pg 8.3 function, strncmp was replaced as my compiler optimised
 * some of the code so well the functions probably ran only once rather than TESTLOOPS times
 * strncmp being optimised already was giving text_position_next an unfair advantage
 * This function was taken from varlena.c -> postgresql 8.3
 */
static int text_position_next(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  int			pos = 0,
				p,
				px;


  /* simple case - single byte encoding */
  const char	   *p1 = haystack;
  const char	   *p2 = needle;

	/* no use in searching str past point where search_str will fit */
	px = (lhaystack - lneedle);

	p1 += startpos - 1;

	for (p = startpos - 1; p <= px; p++)
	{
		if ((*p1 == *p2) && (pg_strncmp(p1, p2, lneedle) == 0))
		{
			pos = p + 1;
			break;
		}
		p1++;
	}

  return pos;
}


#define BM_CHAR_INDEX_LEN UCHAR_MAX+1

static int bmstrpos_v1(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  int charindex[BM_CHAR_INDEX_LEN];
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;
  /* Initalize the character index
   * -1 is used to mark the non existance
   * of a character. 0 would be the first
   * char in the array.
   */
  for (ai = 0; ai < BM_CHAR_INDEX_LEN; ai++)
  {
    charindex[ai] = -1;
  }


  /* Mark the last occurence of each character */
  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai]] = ai;
  }


  hptr = haystack + lneedle - 1;

  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

    while (*nptr == *p) 
    {
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
        return hptr - haystack + 1; 

      p--;
    }

    /* We didn't find a complete match.
     * Check to see if we can move the
     * needle up to align with the current
     * position of the haystack, if we can't
     * then we move the needle up by it's
     * own length then start searching again
     */


    if (charindex[(unsigned char) *hptr] == -1) 
    {
#ifdef BM_DEBUG
      printf("No match, skipping needle length\n");
#endif
      hptr += lneedle;
    } else
    {
#ifdef BM_DEBUG
      printf("Match found %c at %d adding skipping %d chars\n", *hptr, charindex[*hptr], lneedle - charindex[*hptr] - 1);
#endif
      hptr += lneedle - charindex[(unsigned char) *hptr] - 1;
    }

  }

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif

  return 0;
}


/* Version 2
 * Similar to version 1 but stores the needle length
 * in the charindex. This allows elimination of the 
 * test to see if the char exists in the charindex.
 * The performance benefit of this comes in even with
 * very small searches.
 */
static int bmstrpos_v2(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  int charindex[BM_CHAR_INDEX_LEN];
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;

  /* Initalize the character index.
   * lneedle is used to mark the non existance
   * of a character. 0 would be the first
   * char in the array.
   */
  for (ai = 0; ai < BM_CHAR_INDEX_LEN; ai++)
  {
    charindex[ai] = lneedle;
  }


  /* Mark the last occurence of each character */
  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai]] = lneedle - ai - 1;
  }


  hptr = haystack + lneedle - 1;

  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);
#endif

    while (*nptr == *p) 
    {
#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif

      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
        return hptr - haystack + 1; 

      p--;
    }
   
    hptr += charindex[(unsigned char) *hptr];

  }


#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif

  return 0;
}

/* Version 3 Experimental (Testing how fast it can work)
 * This version as is won't support multi bytes. Really this is the
 * version that I've gone all out on to see just how much speed
 * I get get out of this method. You'll notice in the first line
 * of the function body that it uses a static array. I fully
 * expect that it is the general opinion of most people with one
 * that this is a bad idea.
 * The static array will be zero'd at backend startup time
 * we basically rezero it when we're done using the array
 * at the end of the function just before returning the value.
 * This method allows us to cut down on the rezero time as we
 * can just zero the elements that exist in the needle.
 * Though if the needle was to be longer than the array then
 * it's likely a better idea to add code to allow it to take
 * the shortest route (all elements in needle or all in charindex
 * which ever is less).
 */
static int bmstrpos_v3(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  static int charindex[BM_CHAR_INDEX_LEN] = { 0 };
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;

 /* This version does not require the array to be
  * zeroed again. We're using a static array which
  * is initalised to zero when the program starts.
  * We reset it to zero again after we're finished
  * with it. That leaves it ready for the next call.
  for (ai = 0; ai < BM_CHAR_INDEX_LEN; ai++)
  {
    charindex[ai] = 0;
  }
  */


  /* Mark the last occurence of each character */
  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai]] = ai + 1; /* 0 marks non existance */
  }

  hptr = haystack + lneedle - 1;
  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);
#endif

    while (*nptr == *p) 
    {
#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle)  {
        for (ai = 0; ai < lneedle - 1; ai++)
          charindex[(unsigned char) needle[ai]] = 0;
         
        return p - haystack + 1; /* +1 for one based array */
      }

      p--;
    }

    if (charindex[(unsigned char) *hptr] == 0)
    {
#ifdef BM_DEBUG
      printf("No match, skipping needle length\n");
#endif
      hptr += lneedle;
    } else {
#ifdef BM_DEBUG
      printf("Match found %c at %d adding skipping %d chars\n", *hptr, charindex[*hptr], lneedle - charindex[*hptr ]);
#endif
      hptr += lneedle - charindex[(unsigned char) *hptr];
    }

  }

  for (ai = 0; ai < lneedle - 1; ai++)
    charindex[(unsigned char) needle[ai]] = 0;

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif
  return 0;
}




/* Version 4
 * This is very similar to version 5 only inits the array to -1 rather
 * than 0 as in version 5. See comments on v5 for more info.
 */
static int bmstrpos_v4(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  int charindex[BM_HASH_SIZE];
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;

#ifdef BM_DEBUG
  printf("bmstrpos_v4, needle = %s, haystack = %s\n", needle, haystack);
#endif

  /* Initalize the character index
   * -1 is used to mark the non existance
   * of a character. 0 would be the first
   * char in the array.
   */
  for (ai = 0; ai < BM_HASH_SIZE; ai++)
  {
    charindex[ai] = -1;
  }


  /* Mark the last occurence of each character.
   * We ignore the last character
   */
  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai] & BM_AND] = ai;
  }


  hptr = haystack + lneedle - 1;

  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);
#endif
    while (*nptr == *p) 
    {

#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
        return p - haystack + 1; 

      p--;
    }

#ifdef BM_DEBUG
    printf("%c != %c\n", *nptr, *hptr);
#endif
    /* We didn't find a complete match.
     * Check to see if we can move the
     * needle up to align with the current
     * position of the haystack, if we can't
     * then we move the needle up by it's
     * own length then start searching again
     */


    if (charindex[(unsigned char) *hptr & BM_AND] >= 0)
    {
#ifdef BM_DEBUG
      printf("Match found %c at %d adding skipping %d chars\n", *hptr, charindex[*hptr & BM_AND], lneedle - charindex[*hptr & BM_AND] - 1);
#endif
      hptr += lneedle - charindex[(unsigned char) *hptr & BM_AND] - 1;
    } else {

#ifdef BM_DEBUG
      printf("No match, skipping needle length\n");
#endif
      hptr += lneedle;
    }

  }

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif

  return 0;
}

/* Version 5
 * I would say this is the most likely candidate out of all of the versions.
 * If the BM_HASH_SIZE is to be set to UCHAR_MAX+1 then it would just be a
 * matter of removing the ANDs.
 */

static int bmstrpos_v5(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  int charindex[BM_HASH_SIZE];
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;

#ifdef BM_DEBUG
  printf("bmstrpos_v5, needle = %s, haystack = %s\n", needle, haystack);
#endif

  /* Initalize the character index
   * -1 is used to mark the non existance
   * of a character. 0 would be the first
   * char in the array.
   */
  for (ai = 0; ai < BM_HASH_SIZE; ai++)
  {
    charindex[ai] = 0;
  }


  /* Mark the last occurence of each character.
   * We ignore the last character
   */
  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai] & BM_AND] = ai + 1;
  }


  hptr = haystack + lneedle - 1;

  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);
#endif

    while (*nptr == *p) 
    {

#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
        return p - haystack + 1; 

      p--;
    }

#ifdef BM_DEBUG
    printf("%c != %c\n", *nptr, *hptr);
#endif
    /* We didn't find a complete match.
     * Check to see if we can move the
     * needle up to align with the current
     * position of the haystack, if we can't
     * then we move the needle up by it's
     * own length then start searching again
     */


    if (charindex[(unsigned char) *hptr & BM_AND] > 0)
    {
#ifdef BM_DEBUG
      printf("Match found %c at %d adding skipping %d chars\n", *hptr, charindex[*hptr & BM_AND], lneedle - charindex[*hptr & BM_AND]);
#endif
      hptr += lneedle - charindex[(unsigned char) *hptr & BM_AND];
    } else {

#ifdef BM_DEBUG
      printf("No match, skipping needle length\n");
#endif
      hptr += lneedle;
    }

  }

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif

  return 0;
}

/* Version 6 (Experimental)
 * This version works similar to version 3.
 * Here we use a static array which will be
 * initalized to 0 when the backend starts up, we re-zero the array once we are done with it.
 * The rezero should really check the length of the needle to ensure it's not longer than the
 * array. If it was it would be faster to just re-zero the array with memset.
 *
 * The draw back with this version is that it will very slighly slow the backend startup time.
 * Another drawback is it's not very future proof, if the backends are ever multi-threaded this
 * function would not be safe to use.
 */

static int bmstrpos_v6(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  static int charindex[BM_HASH_SIZE] = { 0 };
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;

#ifdef BM_DEBUG
  printf("bmstrpos_v6, needle = %s, haystack = %s\n", needle, haystack);
#endif

 
/*
  for (ai = 0; ai < BM_HASH_SIZE; ai++)
  {
    charindex[ai] = 0;
  }
*/

  /* Mark the last occurence of each character.
   * We ignore the last character
   */

  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai] & BM_AND] = ai + 1;
  }


  hptr = haystack + lneedle - 1;

  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);

#endif
    while (*nptr == *p) 
    {

#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
      {
        for (ai = 0; ai < lneedle - 1; ai++)
          charindex[(unsigned char) needle[ai] & BM_AND] = 0;

        return p - haystack + 1; 

      }
      p--;
    }

#ifdef BM_DEBUG
    printf("%c != %c\n", *nptr, *hptr);
#endif
  
    /* We didn't find a complete match.
     * Check to see if we can move the
     * needle up to align with the current
     * position of the haystack, if we can't
     * then we move the needle up by it's
     * own length then start searching again
     */

    if (charindex[(unsigned char) *hptr & BM_AND] > 0)
    {
#ifdef BM_DEBUG
      printf("Match found %c at %d adding skipping %d chars\n", *hptr, charindex[*hptr & BM_AND], lneedle - charindex[*hptr & BM_AND]);
#endif
      hptr += lneedle - charindex[(unsigned char) *hptr & BM_AND];
    } else {

#ifdef BM_DEBUG
      printf("No match, skipping needle length\n");
#endif
      hptr += lneedle;
    }

  }

  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai] & BM_AND] = 0;
  }

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif


  return 0;
}

/* Version 7 (Probably more consistent times than v5)
 * Much like version 5 but does not zero the array, this
 * initalizes the array to the length of the needle as done
 * in version 2. This gives more speed for longer strings as
 * there is a light overhead with some more calculations done
 * when initalizing the array
 */
static int bmstrpos_v7(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  int charindex[BM_HASH_SIZE];
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;

#ifdef BM_DEBUG
  printf("bmstrpos_v7, needle = %s, haystack = %s\n", needle, haystack);
#endif

  /* Initalize the array to the length
   * of the needle. If we don't find
   * a used element in the array then
   * we skip by needle length.
   */
  for (ai = 0; ai < BM_HASH_SIZE; ai++)
  {
    charindex[ai] = lneedle;
  }


  /* Mark the last occurence of each character.
   * We ignore the last character
   */

  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai] & BM_AND] = lneedle - ai - 1;
  }


  hptr = haystack + lneedle - 1;
  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);

#endif
    while (*nptr == *p) 
    {

#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
      {
        return p - haystack + 1; 
      }
      p--;
    }

#ifdef BM_DEBUG
    printf("%c != %c\n", *nptr, *hptr);
#endif
  
    /* We didn't find a complete match.
     * Check to see if we can move the
     * needle up to align with the current
     * position of the haystack, if we can't
     * then we move the needle up by it's
     * own length then start searching again
     */

    hptr += charindex[(unsigned char) *hptr & BM_AND];
  }

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif


  return 0;
}


/* Version 8 (as 7 but with variable array size)
 * This has some very basic cost based optimisation included.
 * The array size is made smaller for smaller strings.
 */

static int bmstrpos_v8(int startpos, const char *needle, const char *haystack, int lneedle, int lhaystack)
{
  
  int charindex[256];
  int ai;
  const char *nptr;
  const char *hptr;
  const char *p;
  int hashsize = 256;

  /* Very little work has been done to see which works best.
   * It all depends on the mix of characters. These "costs"
   * probably need a lot more research.
   */
  hashsize >>= (lhaystack < 4096); /* 128 elements */
  hashsize >>= (lhaystack < 2048); /* 64 elements */
  hashsize >>= (lhaystack < 1024); /* 32 elements */
  hashsize >>= (lhaystack < 512); /* 16 elements */
  hashsize >>= (lhaystack < 256); /* 8 elements */
  hashsize >>= (lhaystack < 128); /* 4 elements */
  hashsize--;

  //assert((hashsize & 0xffff) = hashsize);

#ifdef BM_DEBUG
  printf("bmstrpos_v8, needle = %s, haystack = %s\n", needle, haystack);
  printf("*** USING HASHSIZE = %d HAYSTACK LENGTH = %d ***\n", hashsize + 1, lhaystack);
#endif

  /* Initalize the array to the length
   * of the needle. If we don't find
   * a used element in the array then
   * we skip by needle length.
   */
  for (ai = 0; ai <= hashsize; ai++)
  {
    charindex[ai] = lneedle;
  }


  /* Mark the last occurence of each character.
   * We ignore the last character
   */

  for (ai = 0; ai < lneedle - 1; ai++)
  {
    charindex[(unsigned char) needle[ai] & hashsize] = lneedle - ai - 1;
  }


  hptr = haystack + lneedle - 1;
  
  while (hptr < haystack + lhaystack) 
  {
    nptr = needle + lneedle - 1; /* Point to the end of the needle */
    p = hptr;

#ifdef BM_DEBUG
    printf("-> %*s\n", 1 - (int) (haystack - hptr), needle);
    printf("-> %s\n", haystack);

#endif
    while (*nptr == *p) 
    {

#ifdef BM_DEBUG
      printf("%c = %c\n", *nptr, *p);
#endif
      /* If we've decended back to the start of
       * the needle then we've found a match.
       * Return the 1 based array position of the 
       * match. 
       */
      if (nptr-- == needle) 
      {
        return p - haystack + 1; 
      }
      p--;
    }

#ifdef BM_DEBUG
    printf("%c != %c\n", *nptr, *hptr);
#endif
  
    /* We didn't find a complete match.
     * Check to see if we can move the
     * needle up to align with the current
     * position of the haystack, if we can't
     * then we move the needle up by it's
     * own length then start searching again
     */

    hptr += charindex[(unsigned char) *hptr & hashsize];
  }

#ifdef BM_DEBUG
  printf("Finished. No Match!\n");
#endif


  return 0;
}

/* EOF */