class VacancyDocParser {
  constructor(setVacancies) {
    this._baseUrl = 'https://tring.peopleforce.io/careers/';
    this._defaultUrl = 'https://tring.peopleforce.io';
    this.setVacancies = setVacancies;
  }

  getDoc = async url => {
    const res = await fetch(url);
    const stringifiedHTML = await res.text();
    let parser = new DOMParser();

    return parser.parseFromString(stringifiedHTML, 'text/html');
  };

  getVacancyLinks = async (page = 1, visitedLinks = new Set()) => {
    const linkQuery = page === 1 ? '' : `?page=${page}`;
    const doc = await this.getDoc(`${this._baseUrl}${linkQuery}`);

    let links = [];
    const anchors = doc.getElementById('results')?.querySelectorAll('a.stretched-link');
    if (!anchors || !anchors.length) return [];

    anchors.forEach(anchor => {
      // Directly read the href attribute without letting it resolve to a relative path
      const anchorLink = anchor.getAttribute('href');
      if (!anchorLink) return; // Ensure there's a valid href

      // Construct the full URL if the link is relative
      const fullLink = anchorLink.startsWith('http')
        ? anchorLink
        : `${this._defaultUrl}/${anchorLink.replace(/^\//, '')}`; // Removes leading slash if needed

      // Check if link is unique and push to list
      if (!visitedLinks.has(fullLink)) {
        links.push(fullLink);
        visitedLinks.add(fullLink); // Track unique links
      }
    });
    // If no new links were added for this page, stop recursion
    if (links.length === 0) return [];

    // Fetch the next page's links and merge them
    const nextPageLinks = await this.getVacancyLinks(page + 1, visitedLinks);
    links.push(...nextPageLinks);

    return links;
  };

  getVacancyText = html => {
    let finalTetx = '';
    let parser = new DOMParser();
    let res = parser.parseFromString(html, 'text/html');
    let textNodes = res.querySelectorAll('p');
    textNodes = Array.from(textNodes);
    for (let i = 0; i <= textNodes.length - 1; i++) {
      finalTetx = finalTetx + textNodes[i].innerText + ' ';
    }
    return finalTetx;
  };

  //for getting description of vacancy. It is after h2 and consists of different elements
  getAllNextSiblings = currentElem => {
    let nextSibling = currentElem.nextElementSibling;

    if (!nextSibling) return '';

    const html = nextSibling.outerHTML + this.getAllNextSiblings(nextSibling);
    return html;
  };

  getVacancy = async (url, id) => {
    const doc = await this.getDoc(url);

    const titleEl = doc.querySelector('h2');
    const titleText = titleEl.innerText;
    const content = doc.querySelector('.col-lg-8').innerHTML;
    const location = doc.querySelector('dl > dd:last-of-type').innerText;

    const description = this.getAllNextSiblings(titleEl.nextElementSibling); //cause after h2 usually is hr

    return {
      id,
      title: titleText,
      content,
      location,
      description,
    };
  };

  getVacancies = async () => {
    const links = await this.getVacancyLinks();
    const vacancies = await Promise.all(
      links.map((link, index) => this.getVacancy(link, index + 1)),
    );
    this.setVacancies(vacancies);
  };
}

export default VacancyDocParser;
