import * as PDFJS from 'pdfjs-dist';
// @ts-ignore
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
PDFJS.GlobalWorkerOptions.workerSrc = pdfjsWorker;
import { getDocument } from "pdfjs-dist";

/**
 * Extracts text from a PDF file.
 *
 * @param {ArrayBuffer} arrayBuffer - The PDF file data.
 * @return {Promise<string>} The extracted text from the PDF file.
 */
export async function extractTextFromPDF(arrayBuffer: ArrayBuffer): Promise<string> {
    let text = '';

    const pdfData = new Uint8Array(arrayBuffer);
    const pdf = await getDocument({ data: pdfData }).promise;
    const numPages = pdf.numPages;

    if (numPages < 20) {
        const textContents = await Promise.all(
            Array.from({ length: numPages }, (_, i) =>
                pdf.getPage(i + 1).then((page) => page.getTextContent())
            )
        );

        text = textContents
            .map((content) => extractTextFromContent(content))
            .join('\n');
    } else {
        throw {
            status: 502,
            message: "To many pages (Max.: 20)."
        };
    }

    return text; 
}

/**
 * Extracts text from the given text file.
 *
 * @param {any} content - The content object to extract text from.
 * @return {string} The extracted text.
 */
export const extractTextFromContent = (content: any): string => {
    let text = '';
    for (const item of content.items) {
        if (item.hasOwnProperty('str')) {
            text += item.str;
        } else if (item.hasOwnProperty('textContent')) {
            text += item.textContent;
        }
    }
    return text;
};