import JSZip from 'jszip';
import { PDFDocument } from 'pdf-lib';
import * as pdfjsLib from 'pdfjs-dist/webpack'; 

pdfjsLib.GlobalWorkerOptions.workerSrc = `${process.env.PUBLIC_URL}/pdf.worker.min.js`;

const extractTextFromDocx = async (arrayBuffer) => {
  try {
    const zip = await JSZip.loadAsync(arrayBuffer);
    const documentXml = await zip.file('word/document.xml')?.async('text');
    
    if (!documentXml) {
      console.warn('No document.xml found in the .docx file.');
      return '';
    }
    
    const plainText = documentXml.replace(/<[^>]+>/g, '').trim();
    return plainText;
  } catch (error) {
    console.error('Error extracting text from DOCX:', error); 
    return '';
  }
};

const extractTextFromPdf = async (arrayBuffer) => {
  try {
    const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
    let extractedText = '';
  
    for (let i = 0; i < pdf.numPages; i++) {
      const page = await pdf.getPage(i + 1);
      const textContent = await page.getTextContent();
      const pageText = textContent.items.map((item) => item.str).join(' ');
      extractedText += `${pageText} `;
    }
    return extractedText.trim();
  } catch (error) {
    console.error('Error extracting text from PDF:', error);
    return '';
  }
};

export const checkForMaliciousContent = async (file) => {
  const maliciousPatterns = [
    /<script[\s\S]*?>[\s\S]*?<\/script>/gi,
    /eval\(/gi,
    /document\.cookie/gi,
    /on\w+="[^"]*"/gi,
    /<iframe[\s\S]*?>/gi,
  ];

  const isMalicious = async (content) => {
    console.log('Checking content for malicious patterns:', content);
    return maliciousPatterns.some((pattern) => pattern.test(content));
  };

  try {
    if (file.type === 'text/plain') {
      const textContent = await file.text();
      return await isMalicious(textContent);
    }

    else if (file.type === 'application/pdf') {
      const arrayBuffer = await file.arrayBuffer();
      const textContent = await extractTextFromPdf(arrayBuffer);
      if (!textContent) {
        console.warn(`No text extracted from PDF file: ${file.name}`);
        return false;
      }
      console.log(`Extracted Text from ${file.name}:`, textContent);
      return isMalicious(textContent);
    }

    else if (file.type === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' || file.type === 'application/msword') {
      const arrayBuffer = await file.arrayBuffer();
      const textContent = await extractTextFromDocx(arrayBuffer);
      return textContent ? isMalicious(textContent) : false;
    }

  } catch (error) {
    console.error(`Error processing file ${file.name}:`, error);
    return true;
  }

  return false; 
};
