import Tesseract from "tesseract.js";
import { PDFDocument } from "pdf-lib";
import { extractTextFromImage as callAmazonTextract } from "./textract";
import {
  getDoc,
  doc,
  updateDoc,
  collection,
  addDoc,
  query,
  orderBy,
  limit,
  getDocs,
} from "firebase/firestore";
import {
  extractDate,
  extractTotals,
  findClientName,
  processAndUploadFile,
} from "./invoiceUtils";
import { processInvoiceText } from "./ai";
import * as pdfjsLib from "pdfjs-dist";
import pLimit from "p-limit";

pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.js`;

const limit2 = pLimit(10); // Limit the number of concurrent operations to 10

export const preprocessImageForTesseract = async (
  imageFile: Blob | MediaSource
): Promise<File> => {
  const canvas = document.createElement("canvas");
  const ctx = canvas.getContext("2d");

  const img = new Image();
  const imageUrl = URL.createObjectURL(imageFile);
  await new Promise((resolve) => {
    img.onload = resolve;
    img.src = imageUrl;
  });

  canvas.width = img.width;
  canvas.height = img.height;

  if (ctx !== null) {
    ctx.drawImage(img, 0, 0);
    const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
    const data = imageData.data;

    for (let i = 0; i < data.length; i += 4) {
      const avg = (data[i] + data[i + 1] + data[i + 2]) / 3;
      const binaryValue = avg > 128 ? 255 : 0;
      data[i] = data[i + 1] = data[i + 2] = binaryValue;
    }
    ctx.putImageData(imageData, 0, 0);

    if (img.width < 1000 || img.height < 1000) {
      const scaleFactor = 2;
      canvas.width = img.width * scaleFactor;
      canvas.height = img.height * scaleFactor;
      ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
    }

    ctx.filter = "contrast(200%)";
    ctx.drawImage(canvas, 0, 0);

    const processedBlob = await new Promise<Blob | null>((resolve) =>
      canvas.toBlob((blob) => resolve(blob), "image/png")
    );
    URL.revokeObjectURL(imageUrl);

    return new File([processedBlob as BlobPart], (imageFile as File).name, {
      type: "image/png",
    });
  }

  return new File([new Blob()], "image.png", { type: "" });
};

export const pdfPageToImage = async (
  pdfBytes: Uint8Array,
  pageNumber: number
): Promise<string> => {
  const loadingTask = pdfjsLib.getDocument({ data: pdfBytes });
  const pdf = await loadingTask.promise;

  // Ensure the requested page exists
  if (pageNumber > pdf.numPages || pageNumber < 1) {
    throw new Error(`Invalid page request: Page ${pageNumber} does not exist.`);
  }

  const page = await pdf.getPage(pageNumber);

  const viewport = page.getViewport({ scale: 2 });
  const canvas = document.createElement("canvas");
  const context = canvas.getContext("2d");
  canvas.height = viewport.height;
  canvas.width = viewport.width;

  if (context) {
    await page.render({ canvasContext: context, viewport }).promise;
  }

  return canvas.toDataURL("image/png");
};

export const processFile = async (
  file: File,
  user: any,
  db: any,
  type: "sales" | "purchase" | "expense",
  isRescan: boolean = false,
  existingId: string = ""
): Promise<any> => {
  const userDocRef = doc(db, "users", user.uid);
  const userDoc = await getDoc(userDocRef);

  if (!userDoc.exists()) {
    throw new Error("User document does not exist in Firestore.");
  }

  const userData = userDoc.data();
  const { role } = userData;
  const userId = user.uid;

  let processedData: any;

  const collectionName =
    type === "purchase"
      ? "purchaseInvoices"
      : type === "sales"
      ? "salesInvoices"
      : "expenseInvoices";

  const userInvoicesRef = collection(
    db,
    collectionName,
    userId,
    "userInvoices"
  );

  const q = query(userInvoicesRef, orderBy("invoiceNumber", "desc"), limit(1));
  const querySnapshot = await getDocs(q);
  const lastInvoiceNumber =
    querySnapshot.docs.length > 0
      ? querySnapshot.docs[0].data().invoiceNumber
      : 0;

  let defaultInvoiceNumber = lastInvoiceNumber + 1;

  if (file.type === "application/pdf") {
    const pdfBuffer = new Uint8Array(await file.arrayBuffer());
    const pdfDoc = await PDFDocument.load(pdfBuffer);
    const numPages = pdfDoc.getPageCount();

    const processingTasks = Array.from({ length: numPages }, (_, i) =>
      limit2(async () => {
        try {
          console.log(`Processing page ${i + 1} of ${numPages}`);

          const imageDataUrl = await pdfPageToImage(pdfBuffer, i + 1);
          const blob = dataURLToBlob(imageDataUrl);
          const imageFile = new File([blob], `page${i + 1}.png`, {
            type: "image/png",
          });

          const pageProcessedData = await processImage(
            imageFile,
            role,
            userData
          );

          const fileUrl = await processAndUploadFile(
            imageFile,
            userId,
            pageProcessedData.invoiceNumber || `${defaultInvoiceNumber + i}`,
            pageProcessedData.date,
            type
          );

          const invoiceData = {
            ...pageProcessedData,
            fileUrl,
            dateScanned: formatDateAsDDMMYYYY(new Date()), // Format the current date
            name: `Invoice_${pageProcessedData.invoiceNumber.replace(
              /\//g,
              "_"
            )}`,
            fileName: `Invoice_${pageProcessedData.invoiceNumber.replace(
              /\//g,
              "_"
            )}_${pageProcessedData.date.replace(/\//g, "_")}`,
          };

          if (isRescan && existingId) {
            const existingDocRef = doc(userInvoicesRef, existingId);
            await updateDoc(existingDocRef, invoiceData);
          } else {
            await addDoc(userInvoicesRef, invoiceData);
          }
        } catch (error) {
          console.error(`Error processing page ${i + 1}:`, error);
        }
      })
    );

    await Promise.all(processingTasks);
  } else {
    processedData = await processImage(file, role, userData);

    const fileUrl = await processAndUploadFile(
      file,
      userId,
      processedData.invoiceNumber || defaultInvoiceNumber,
      processedData.date,
      type
    );

    const invoiceData = {
      ...processedData,
      fileUrl,
      dateScanned: formatDateAsDDMMYYYY(new Date()), // Format the current date
      name: `Invoice_${processedData.invoiceNumber.replace(/\//g, "_")}`,
      fileName: `Invoice_${processedData.invoiceNumber.replace(
        /\//g,
        "_"
      )}_${processedData.date.replace(/\//g, "_")}`,
    };

    if (isRescan && existingId) {
      const existingDocRef = doc(userInvoicesRef, existingId);
      await updateDoc(existingDocRef, invoiceData);
    } else {
      await addDoc(userInvoicesRef, invoiceData);
    }
  }

  return processedData;
};
const formatDateAsDDMMYYYY = (date: Date) => {
  const day = String(date.getDate()).padStart(2, "0");
  const month = String(date.getMonth() + 1).padStart(2, "0");
  const year = date.getFullYear();
  return `${day}/${month}/${year}`;
};
const dataURLToBlob = (dataUrl: string): Blob => {
  const byteString = atob(dataUrl.split(",")[1]);
  const mimeString = dataUrl.split(",")[0].split(":")[1].split(";")[0];
  const ab = new ArrayBuffer(byteString.length);
  const ia = new Uint8Array(ab);
  for (let i = 0; i < byteString.length; i++) {
    ia[i] = byteString.charCodeAt(i);
  }
  return new Blob([ab], { type: mimeString });
};

const processImage = async (
  imageFile: File,
  role: string,
  userData: any
): Promise<any> => {
  let text = "";
  let processedData: any;

  if (role === "amazonAPI") {
    text = await callAmazonTextract(imageFile);
    processedData = await processInvoiceText(
      text,
      userData.companyName,
      userData.apiKey
    );
    processedData.vatTotal = processedData.totals.vatTotal;
    processedData.subtotal = processedData.totals.subtotal;
    processedData.total = processedData.totals.total;
    delete processedData.totals;
    processedData.clientName = processedData.supplierName;
    delete processedData.supplierName;
    console.log("Extracted data from amazon", processedData);
  } else {
    const preprocessedImage = await preprocessImageForTesseract(imageFile);

    const {
      data: { text: extractedText },
    } = await Tesseract.recognize(preprocessedImage, "eng");
    text = extractedText;

    if (role === "API") {
      processedData = await processInvoiceText(
        text,
        userData.companyName,
        userData.apiKey
      );
      processedData.vatTotal = processedData.totals.vatTotal;
      processedData.subtotal = processedData.totals.subtotal;
      processedData.total = processedData.totals.total;
      delete processedData.totals;
      processedData.clientName = processedData.supplierName;
      delete processedData.supplierName;
    } else {
      const date = extractDate(text) ?? "Unknown";
      const totals = extractTotals(text) ?? {
        subtotal: 0,
        vatTotal: 0,
        total: 0,
      };
      const clientName =
        (await findClientName(text, userData.uid)) ?? "Unknown";

      processedData = {
        date,
        subtotal: totals.subtotal,
        vatTotal: totals.vatTotal,
        total: totals.total,
        clientName,
      };
    }
  }

  console.log("Extracted data", processedData);
  return processedData;
};
