import { PageText } from "../api/open-api/generated/types";
import { SearchResult } from "../utils/search";

export default class PageWordIndex {
  public page: number;
  public wordSpan: number[];
  public preContext: string;
  public postContext: string;

  constructor(pageText: PageText, searchHit: SearchResult) {
    this.page = pageText.page;

    const index = this.buildIndex(pageText.text);
    const span = this.charToWordIndex(searchHit.span, index);
    this.wordSpan = [span, span + 1];

    [this.preContext, this.postContext] = this.getMatchContext(80, searchHit, this.wordSpan, index, pageText.text);
  }

  buildIndex(pageText: string): number[] {
    const newWordIndex: number[] = [];
    // init prev to true so that if initial character is not whitespace, will record first word at index 0
    let prevWasWhitespace = true;
    for (let i = 0; i < pageText.length; i++) {
      const isNonWhiteSpace = /\S/.test(pageText[i]);
      if (prevWasWhitespace && isNonWhiteSpace) {
        // we're at a new word boundary, remember this position in the text
        newWordIndex.push(i);
      }
      prevWasWhitespace = !isNonWhiteSpace;
    }
    // append Infinity to end for the divide and conquer algorithm
    newWordIndex.push(Infinity);
    return newWordIndex;
  }

  charToWordIndex(charSpan: number[], wordIndex: number[]): number {
    let lo = 0;
    let hi = wordIndex.length - 1;
    let iterations = 0;
    while (lo < hi && iterations < 100) {
      const mid = lo + ((hi - lo) >> 1);
      if (wordIndex[mid] >= charSpan[0] && wordIndex[mid + 1] < charSpan[1]) {
        return mid;
      }
      if (wordIndex[mid] > charSpan[0]) {
        hi = mid;
      } else {
        lo = mid;
      }
      iterations += 1;
    }
    return lo;
  }

  getMatchContext(
    maxCharacters: number,
    searchHit: SearchResult,
    span: number[],
    index: number[],
    pageText: string,
  ): [string, string] {
    if (!searchHit.text.length) {
      // If there is no specific match, assume this is a page-based search result, as can happen for the model query
      // searches. Will just return up to maxCharacters starting at the beginning of the page, as 'preText'.
      return [pageText.substring(0, Math.min(maxCharacters, pageText.length)), ""];
    }
    const [firstMatchWord, lastMatchWord] = span;
    const firstMatchChar = index[firstMatchWord];
    // trying this...
    const lastMatchChar = index[lastMatchWord];
    // instead of the original code from charts module that was using words array of "split()"
    // const lastMatchChar = index[lastMatchWord] + GetWordFromIndex(this.page, lastMatchWord).length;
    let preText;
    let postText;
    let charactersLeft = maxCharacters - searchHit.text.length;
    const start = Math.max(0, firstMatchChar - Math.ceil(maxCharacters / 2));
    if (start !== 0) {
      preText = `...${pageText.substring(start, firstMatchChar)}`;
    } else {
      preText = pageText.substring(start, firstMatchChar);
    }
    charactersLeft -= preText.length;
    if (lastMatchChar + charactersLeft < pageText.length) {
      postText = `${pageText.substring(lastMatchChar, lastMatchChar + charactersLeft - 3)}...`;
    } else {
      postText = pageText.substring(lastMatchChar, lastMatchChar + charactersLeft - 3);
    }
    return [preText, postText];
  }
}
