import * as pdfjs from "pdfjs-dist";
pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.js`;

const getMatchGroup = (string, regex, offset = 1) => {
  const match = regex.exec(string);
  return match && match[offset];
};

const extractPDFJS = (dataBuffer) => {
  const rawData = new Uint8Array(dataBuffer);
  const loadingTask = pdfjs.getDocument(rawData);
  return loadingTask.promise.then((pdfDocument) => {
    var pageContents = {};
    const pageOffsets = Array(pdfDocument.numPages)
      .fill(0)
      .map((_, offset) => offset + 1);
    return Promise.all(
      pageOffsets.map((offset) => {
        return pdfDocument.getPage(offset).then((page) => {
          return page.getTextContent().then((pageContent) => {
            const text = pageContent.items.map((item) => item.str).join("\n");
            pageContents[offset] = text;
          });
        });
      })
    ).then((_) => {
      return pageOffsets.map((offset) => pageContents[offset]).join("\n");
    });
  });
};

const PDFJS_REGEX = {
  ReportDateSection: /Responsabilidades de crédito referentes a /,
  ReportDate: /(.*) de (.*)/,
  ReportInstitutions: /Nº\sde\sinstituições\sque\scomunicaram\sinformação:/,
  ReportDefaultInstitutions: /Nº de instituições que comunicaram incumprimento:/,
  ReportCredits: /Nº total de produtos financeiros comunicados:/,
  Amount: /[\d\s,]* €/,
  DateValue: /\d{4}-\d{2}-\d{2}/,
  StartDate: /Início/,
  EndDate: /Fim/,
  Creditor: /Informação comunicada pela instituição: (.*)/,
  CreditorDate: /Responsabilidades de crédito relativas a (.*) de (.*) de (.*)/,
  Product: /Produto financeiro/,
  Principal: /Total em dívida/,
  Default: /do qual, em incumprimento/,
  Period: /Periodicidade/,
  Payment: /Abatido ao ativo/,
  Negotiation: /Tipo de negociação/,
  GuaranteeSectionStart: /Número/,
  GuaranteeType: /^\d{4}$/,
  GuaranteeSectionEnd: /-/,
  CompanyName: /Nome: /,
  CompanyVatId: /Nº de Identificação: /,
  StopProcessing: /Legenda/,
};

const parsePDFJSLines = (lines) => {
  var report = {};
  var credits = [];
  var guaranteeTypes = {};
  var state = undefined;
  var creditor = undefined;
  var creditorDate = {};
  var currentCredit = undefined;
  var currentGuarantee = undefined;
  var stop = false;
  lines.forEach((line) => {
    if (!stop) {
      if (line.match(PDFJS_REGEX.CompanyName)) {
        state = PDFJS_REGEX.CompanyName;
      } else if (state === PDFJS_REGEX.CompanyName) {
        report.company = { ...report.company, name: line };
        state = undefined;
      } else if (line.match(PDFJS_REGEX.CompanyVatId)) {
        state = PDFJS_REGEX.CompanyVatId;
      } else if (state === PDFJS_REGEX.CompanyVatId) {
        report.company = { ...report.company, vatId: line };
        state = undefined;
      } else if (getMatchGroup(line, PDFJS_REGEX.Creditor)) {
        creditor = getMatchGroup(line, PDFJS_REGEX.Creditor);
        creditorDate = undefined;
      } else if (line.match(PDFJS_REGEX.Principal)) {
        state = PDFJS_REGEX.Principal;
      } else if (
        state === PDFJS_REGEX.Principal &&
        line.match(PDFJS_REGEX.Amount)
      ) {
        const principal = line;
        currentCredit = { creditor, principal, guarantees: [] };
        if (creditorDate) currentCredit.creditorDate = creditorDate;
        credits = [...credits, currentCredit];
        currentCredit = credits[credits.length - 1];
        state = PDFJS_REGEX.Default;
      } else if (
        state === PDFJS_REGEX.Default &&
        line.match(PDFJS_REGEX.Amount)
      ) {
        currentCredit.default = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.Product)) {
        state = PDFJS_REGEX.Product;
      } else if (state === PDFJS_REGEX.Product) {
        currentCredit.product = line;
        state = false;
      } else if (line.match(PDFJS_REGEX.StartDate)) {
        state = PDFJS_REGEX.StartDate;
      } else if (state === PDFJS_REGEX.StartDate) {
        currentCredit.startDate = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.EndDate)) {
        state = PDFJS_REGEX.EndDate;
      } else if (state === PDFJS_REGEX.EndDate) {
        currentCredit.endDate = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.Period)) {
        state = PDFJS_REGEX.Period;
      } else if (state === PDFJS_REGEX.Period) {
        currentCredit.period = line.match(/Mensal|Trimestral|Semestral|Anual/)
          ? line
          : "";
        state = undefined;
      } else if (line.match(PDFJS_REGEX.Payment)) {
        state = PDFJS_REGEX.Payment;
      } else if (state === PDFJS_REGEX.Payment) {
        currentCredit.payment = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.Negotiation)) {
        state = PDFJS_REGEX.Negotiation;
      } else if (state === PDFJS_REGEX.Negotiation) {
        currentCredit.negotiation = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.ReportDateSection)) {
        state = PDFJS_REGEX.ReportDateSection;
      } else if (
        state === PDFJS_REGEX.ReportDateSection &&
        line.match(PDFJS_REGEX.ReportDate)
      ) {
        report.date = {
          month: getMatchGroup(line, PDFJS_REGEX.ReportDate, 1),
          year: getMatchGroup(line, PDFJS_REGEX.ReportDate, 2),
        };
        state = false;
        // } else if (getMatchGroup(line, PDFJS_REGEX.CreditorDate)) {
        //   creditorDate = {
        //     day: getMatchGroup(line, PDFJS_REGEX.CreditorDate, 1),
        //     month: getMatchGroup(line, PDFJS_REGEX.CreditorDate, 2),
        //     year: getMatchGroup(line, PDFJS_REGEX.CreditorDate, 3)
        //   };
        //   currentCredit = credits[credits.length - 1];
        //   currentCredit.creditorDate = creditorDate;
      } else if (line.match(PDFJS_REGEX.GuaranteeSectionStart)) {
        state = PDFJS_REGEX.GuaranteeSectionStart;
      } else if (
        state === PDFJS_REGEX.GuaranteeSectionStart &&
        line.match(PDFJS_REGEX.GuaranteeType)
      ) {
        currentGuarantee = { type: line };
        // console.log(currentGuarantee);
      } else if (
        state === PDFJS_REGEX.GuaranteeSectionStart &&
        line.match(PDFJS_REGEX.Amount)
      ) {
        currentGuarantee.amount = line;
        currentCredit.guarantees = [
          ...currentCredit.guarantees,
          currentGuarantee,
        ];
        // console.log(currentGuarantee);
        currentGuarantee = undefined;
      } else if (
        state === PDFJS_REGEX.GuaranteeSectionStart &&
        line.match(PDFJS_REGEX.GuaranteeSectionEnd)
      ) {
        state = false;
      } else if (
        state !== PDFJS_REGEX.GuaranteeSectionStart &&
        line.match(PDFJS_REGEX.GuaranteeType)
      ) {
        currentGuarantee = { type: line };
        state = PDFJS_REGEX.GuaranteeType;
      } else if (state === PDFJS_REGEX.GuaranteeType) {
        guaranteeTypes[currentGuarantee.type] = line;
        currentGuarantee = false;
        state = false;
      } else if (line.match(PDFJS_REGEX.StopProcessing)) {
        stop = true;
      }
    } else {
      if (line.match(PDFJS_REGEX.ReportInstitutions)) {
        state = PDFJS_REGEX.ReportInstitutions;
      } else if (state === PDFJS_REGEX.ReportInstitutions) {
        report.institutions = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.ReportDefaultInstitutions)) {
        state = PDFJS_REGEX.ReportDefaultInstitutions;
      } else if (state === PDFJS_REGEX.ReportDefaultInstitutions) {
        report.defaultInstitutions = line;
        state = undefined;
      } else if (line.match(PDFJS_REGEX.ReportCredits)) {
        state = PDFJS_REGEX.ReportCredits;
      } else if (state === PDFJS_REGEX.ReportCredits) {
        report.credits = line;
        state = undefined;
      }
    }
  });
  return { credits, guaranteeTypes, report };
};

export const parsePDFJS = (dataBuffer) => {
  return extractPDFJS(dataBuffer).then((text) => {
    const lines = text.split(/\n/gi);
    const { credits, guaranteeTypes, report } = parsePDFJSLines(lines);
    return { lines, credits, guaranteeTypes, report };
  });
};
