import mammoth from "mammoth";
import { maximumPagesReached } from "../helper/pageCounter/pageCounter";

/**
 * Extracts text from a DOCX file using Mammoth.
 *
 * @param {ArrayBuffer} arrayBuffer - The ArrayBuffer representation of the DOCX file.
 * @return {Promise<string>} The extracted text from the DOCX file.
 */
export async function extractTextFromDocx(arrayBuffer: ArrayBuffer): Promise<string> {
    try {
        const result = await mammoth.extractRawText({ arrayBuffer });
        result.value = result.value.replaceAll('\n', ' ');
        const words = result.value.trim().split(/\s+/);
        const wordCount = words.length;

        if(maximumPagesReached(wordCount))
            throw { status: 502, message: "To many pages (Max.: 20)." };
        
        return result.value;
    } catch (error) {
        throw error;
    }
}