import { getDocument as getPdfDocument, GlobalWorkerOptions } from 'pdfjs-dist-es5/build/pdf';
import { isBase64 } from '@/shared/utils';
import mobileService from '@/services/mobilePhoto.service';

GlobalWorkerOptions.workerSrc = process.env.BASE_URL + '/js/pdf.worker.js';

// inherited and double checked with https://en.wikipedia.org/wiki/VAT_identification_number
const eu_regexes = [
	/AT\s?U\s?[0-9]{8}/, // OK
	/BE\s?0?[0-9]{3}[\s.]?[0-9]{3}[\s.]?[0-9]{3}/, // OK
	/BG\s?[0-9]{9, 10}/, // ok - maybe check format / spacing betweet characters ?
	/CY\s?[0-9]{8}\s?[A-Z]/, // ok
	/CZ\s?[0-9]{8}([0-9]{2})?/, // ok
	/DE\s?0?[0-9]{3}[\s.]?[0-9]{3}[\s.]?[0-9]{3}/, // OK
	/DK\s?[0-9]{8}/, // OK
	/EE\s?[0-9]{3}[\s.]?[0-9]{3}[\s.]?[0-9]{3}/, // OK
	/(EL|GR)\s?[0-9]{3}[\s.]?[0-9]{3}[\s.]?[0-9]{3}/, // OK
	/ES\s?[0-9A-Z][0-9]{7}[0-9A-Z]/, // OK
	/FI\s?[0-9]{8}/, // OK
	/FR\s?[\s.]?[0-9]{2}[0-9]{3}[\s.]?[0-9]{3}[\s.]?[0-9]{3}/, // OK
	/GB(([1-9]{3}[\s.]?[1-9]{3}[\s.]?[1-9]{3})|([1-9]{3}[\s.]?[1-9]{3}[\s.]?[1-9]{3}[\s.]))/, // OK
	/HU\s?[0-9]{8}/, //OK
	/IE(([0-9]{7}[A-Z]{1,2}|([0-9][A-Z][0-9]{5}[A-Z])))/, // OK
	/IT\s?[0-9]{11}/, // OK
	/LT\s?(([1-9]{3}[\s.]?[1-9]{3}[\s.]?[1-9]{3})|([1-9]{3}[\s.]?[1-9]{3}[\s.]?[1-9]{3}[\s.]))/,
	/LU\s?[0-9]{8}/, //OK
	/LV\s?[0-9]{11}/, //OK
	/MT\s?[0-9]{8}/, //OK
	/NL\s?[0-9]{9}B[0-9]{2}/, // OK
	/PL\s?([0-9]{10}|([0-9]{3}-[0-9]{3}-[0-9]{2}-[0-9]{2})|([0-9]{3}-[0-9]{2}-[0-9]{2}-[0-9]{3}))/, // OK
	/PT\s?[0-9]{3}[\s.]?[0-9]{3}[\s.]?[0-9]{3}/, // OK
	/RO\s?[0-9]{2,10}/, //OK
	/SE\s?[0-9]{12}/, // OK
	/SI\s?[0-9]{8}/, // OK
	/SK\s?[0-9]{10}/ // OK
];

const API_KEY = process.env.VUE_APP_GOOLGE_API_KEY;
const EMAIL_REGEX = /[\w-\.]+@([\w-]+\.)+[\w-]{2,4}/g;

const getComposedRegex = (regexes) => new RegExp(regexes.map((regex) => `(${regex.source})`).join('|'), 'g');

const t = getComposedRegex(eu_regexes);

const REGEXES = [
	{
		label: 'EMAIL',
		regex: EMAIL_REGEX
	},
	{
		label: 'VAT',
		regex: getComposedRegex(eu_regexes)
	}
];

function formatBase64String(base64string) {
	return base64string.replace(/^data:image\/(png|jpg|jpeg);base64,/, '');
}

function getBody(base64Imgs) {
	return {
		requests: base64Imgs.map((base64Img) => ({
			image: {
				content: formatBase64String(base64Img)
			},
			features: [
				{
					type: 'TEXT_DETECTION'
				}
			]
		}))
	};
}

/**
 * gonna convert the pdf's url into an image readable by OCR
 *
 * @async
 * @function
 * @param {!String} pdfUrl
 */
const convertPdfToImages = async (pdfUrl) => {
  const pdf = await getPdfDocument(pdfUrl).promise;
  const pages = [];
  for (let i = 1; i <= pdf.numPages; i++) {
    const page = await pdf.getPage(i);
    const viewport = page.getViewport({ scale: 1 });
    const canvas = document.createElement('canvas');
    canvas.width = viewport.width;
    canvas.height = viewport.height;
    const ctx = canvas.getContext('2d');
    await page.render({ canvasContext: ctx, viewport }).promise;
    pages.push(canvas.toDataURL());
  }
  return pages;
}

async function getImageTextContent(imgs) {
	const body = getBody(imgs);
	const url = `https://vision.googleapis.com/v1/images:annotate?key=${API_KEY}`;
	const options = {
		method: 'POST',
		headers: {
			Accept: 'application/json',
			'Content-Type': 'application/json;charset=UTF-8'
		},
		body: JSON.stringify(body)
	};
	try {
		const res = await fetch(url, options);
		return res.json();
	} catch (e) {
		throw new Error('Could not analyse image');
	}
}

function getFullText(ocrData) {
	if (!ocrData || !ocrData.responses || ocrData.responses.length === 0) {
		return null;
	}
	return ocrData.responses
		.map((e) =>
			e.textAnnotations
				.map((e) => e.description)
				.filter((e) => !!e)
				.join(' ')
		)
		.join(' ')
		.trim();
}

function getRegexMatches(text) {
	return REGEXES.map((reg) => {
		const match = text.matchAll(reg.regex);
		if (match) {
			return {
				...reg,
				match: Array.from(match)
			};
		}
		return {
			...reg,
			match: null
		};
	});
}

/**
 * Function to ensure all images are in Base64 format
 * 
 * @param {!Array} [images=[]]
 * @return {Array}
 */
const ensureImagesAreBase64 = async (images = []) => {
  const res = [];
  for(let i = 0; i < images.length; i++) {
    if (isBase64(images[i])) {
      res.push(images[i]);
      continue;
    }
    const file = process.env.VUE_APP_PATH_IMG_URL + images[i];

    // Check if the file is a PDF
    if (images[i].endsWith('.pdf')) {
      const imagePages = await convertPdfToImages(file);
      res.push(...imagePages);
      continue;
    }

    // change 'images[i]' to your actual ref name of the file input
    const _r = await mobileService.readAsBase64(file);
    res.push(_r)
  }
  return res;
}

async function ocrImgAnalysis(images) {
  const cleanedImgs = await ensureImagesAreBase64(images)
	const ocrData = await getImageTextContent(cleanedImgs);
	const fullText = getFullText(ocrData);
	const regexMatches = getRegexMatches(fullText);
	return {
		regexMatches
	};
}

export default ocrImgAnalysis;
