<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.

namespace qbank_questiongen\local;

use cm_info;
use assignfeedback_editpdf\pdf;
use lesson;
use local_ai_manager\ai_manager_utils;
use local_ai_manager\manager;
use qbank_questiongen\form\story_form;
use question_bank;
use setasign\Fpdi\PdfParser\PdfParserException;
use stdClass;

/**
 * Question generator class.
 *
 * @package    qbank_questiongen
 * @copyright  2025 ISB Bayern
 * @author     Philipp Memmel
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */
class question_generator {
    /** @var \core\clock A clock object that is being dependency injected. */
    private readonly \core\clock $clock;

    /** @var string[] Supported mimetypes for converting into text content. */
    const ITT_MIMETYPES = ['application/pdf', 'image/png', 'image/jpeg', 'image/jpg'];

    /**
     * Creates an instance of the question_generator.
     */
    public function __construct(
        /** @var int The id of the context the question_generator is called from. */
        private readonly int $contextid
    ) {
        $this->clock = \core\di::get(\core\clock::class);
    }

    /**
     * Generate a question by using an external LLM.
     *
     * @param stdClass $dataobject of the stored processing data from qbank_questiongen DB table extended with example data.
     * @return stdClass|string object containing information about the generated question or string containing an error message
     *  in case of an error occurred and no question could be generated
     */
    public function generate_question(stdClass $dataobject, bool $sendexistingquestionsascontext): stdClass|string {
        global $CFG, $DB;
        require_once($CFG->dirroot . '/question/engine/bank.php');

        // Build primer.
        $primer = $dataobject->primer;
        $story = $dataobject->story;
        $instructions = $dataobject->instructions;
        $example = $dataobject->example;

        $storyprompt = '';
        $questiontextsinqbankprompt = '';
        $generatedquestiontext = '';

        $provider = get_config('qbank_questiongen', 'provider');
        if ($provider === 'local_ai_manager') {
            $messages = [
                [
                    'sender' => 'system',
                    'message' => $primer,
                ],
                [
                    'sender' => 'system',
                    'message' => $instructions,
                ],
                [
                    'sender' => 'system',
                    'message' => $example,
                ],
            ];

            if ($sendexistingquestionsascontext) {
                $questionidsincategory = question_bank::get_finder()->get_questions_from_categories([$dataobject->category], null);
                if (!empty($questionidsincategory)) {
                    [$insql, $inparams] = $DB->get_in_or_equal($questionidsincategory);
                    $rs = $DB->get_recordset_select('question', "id $insql", $inparams);
                    $questiontextsinqbankcat = [];
                    foreach ($rs as $record) {
                        $questiontextsinqbankcat[] = [
                            'title' => $record->name,
                            'question_text' => strip_tags($record->questiontext),
                        ];
                    }
                    $rs->close();

                    $questiontextsinqbankprompt = 'The question that will be generated by you has to be different '
                        . 'from all of the following questions in this JSON string: "'
                        . json_encode($questiontextsinqbankcat) . '"';
                    $messages[] =
                        [
                            'sender' => 'system',
                            'message' => $questiontextsinqbankprompt,
                        ];
                }
            }

            switch ($dataobject->mode) {
                case story_form::QUESTIONGEN_MODE_TOPIC:
                    $storyprompt =
                        'Create a question about the following topic. Use your own training data to generate it: "' .
                        $story . '"';
                    break;
                case story_form::QUESTIONGEN_MODE_STORY:
                case story_form::QUESTIONGEN_MODE_COURSECONTENTS:
                    $storyprompt =
                        'Create a question from the following contents. '
                        . 'Only use this content and do not use any training data: "' . $story . '"';
                    break;
            }

            $messages[] =
                [
                    'sender' => 'user',
                    'message' => $storyprompt,
                ];

            [
                'generatedquestiontext' => $generatedquestiontext,
                'errormessage' => $errormessage,
            ] = $this->retrieve_llm_response($messages);
        }

        if (!empty($errormessage)) {
            return $errormessage;
        }

        // We return a whole question object containing all the generated data. This can be used for unit tests or logging.
        $question = new stdClass();
        $question->primer = $primer;
        $question->instructions = $instructions;
        $question->example = $example;
        $question->storyprompt = $storyprompt;
        $question->questiontextsinqbankprompt = $questiontextsinqbankprompt;
        $question->text = $generatedquestiontext;

        return $question;
    }

    /**
     * Generates the story to send to the LLM based on the content from course activites.
     *
     * @param array $courseactivities list of course module ids
     * @return string text extracted from the activities that can be send as context to the external AI system
     */
    public function create_story_from_cms(array $courseactivities): string {
        global $CFG;
        require_once($CFG->dirroot . '/question/editlib.php');

        [, $firstcm] = get_module_from_cmid(reset($courseactivities));
        $modinfo = get_fast_modinfo($firstcm->course);
        $story = '';
        $cms = array_filter($modinfo->get_cms(), fn($cm) => in_array($cm->id, $courseactivities));

        foreach ($cms as $cm) {
            if (!in_array($cm->id, $courseactivities)) {
                continue;
            }
            if (!$this->is_cm_supported($cm)) {
                debugging('Course module with id ' . $cm->id . ' is currently not supported');
                continue;
            }
            $story .= $this->extract_content_from_cm($cm);
        }
        return $story;
    }

    /**
     * Returns if a course module is supported by the question generator.
     *
     * @param cm_info $cm the cm_info object of the course module
     * @return bool true if extracting content from the course module is supported, false otherwise
     */
    public static function is_cm_supported(cm_info $cm): bool {
        if (in_array($cm->modname, ['page', 'label', 'lesson', 'book', 'folder'])) {
            return true;
        }
        if ($cm->modname === 'resource') {
            $context = \context_module::instance($cm->id);
            $fs = get_file_storage();
            $files = $fs->get_area_files($context->id, 'mod_resource', 'content', 0, 'sortorder DESC, id ASC', false);
            $file = reset($files);
            return in_array($file->get_mimetype(), self::get_supported_mimetypes());
        }
        return false;
    }

    /**
     * Returns the mimetypes that are supported for converting into text.
     *
     * @return string[] array of mimetypes that are supported for being converted into text
     */
    public static function get_supported_mimetypes(): array {
        return ['text/plain', 'text/html', 'text/csv', 'application/pdf', 'image/png', 'image/jpeg', 'image/jpg'];
    }

    /**
     * For a given course module, extract the content as plain text.
     *
     * @param cm_info $cm the cm_info object of the course module
     * @return string the content of the course module as plain text.
     * @throws \coding_exception if the course module is not supported
     */
    public function extract_content_from_cm(cm_info $cm): string {
        global $CFG, $DB;
        // TODO Eventually also respect course module descriptions and title?
        $content = '';
        $instance = $cm->get_instance_record();
        switch ($cm->modname) {
            case 'page':
                $content = $instance->content;
                break;
            case 'label':
                $content = $instance->intro;
                break;
            case 'resource':
                $context = \context_module::instance($cm->id);
                $fs = get_file_storage();
                $files = $fs->get_area_files($context->id, 'mod_resource', 'content', 0, 'sortorder DESC, id ASC', false);
                $file = reset($files);
                if (!empty($file) && in_array($file->get_mimetype(), self::get_supported_mimetypes())) {
                    if (in_array($file->get_mimetype(), self::ITT_MIMETYPES)) {
                        $content = $this->extract_content_from_pdf_or_image($file);
                    } else {
                        $content = $file->get_content();
                    }
                }
                break;
            case 'folder':
                $context = \context_module::instance($cm->id);
                $fs = get_file_storage();
                $files = $fs->get_area_files($context->id, 'mod_folder', 'content', 0, 'id ASC', false);
                $filecontents = [];
                foreach ($files as $file) {
                    if (!empty($file) && in_array($file->get_mimetype(), self::get_supported_mimetypes())) {
                        if (in_array($file->get_mimetype(), self::ITT_MIMETYPES)) {
                            $filecontents[] = trim($this->extract_content_from_pdf_or_image($file));
                        } else {
                            $filecontents[] = trim($file->get_content());
                        }
                    }
                }
                // Will later be converted to proper line breaks.
                $content = implode("<br/><br/>", $filecontents);
                break;
            case 'lesson':
                require_once($CFG->dirroot . '/mod/lesson/locallib.php');
                $lesson = lesson::load($instance->id);
                $pages = $lesson->load_all_pages();
                $pagescontents = [];
                foreach ($pages as $page) {
                    // We must not use $page->get_contents() here because it requires having the $PAGE object set up properly for
                    // the lesson course module which we do not have.
                    $pagescontents[] = trim($page->properties()->contents);
                }
                $content = implode("<br/><br/>", $pagescontents);
                break;
            case 'book':
                require_once($CFG->dirroot . '/mod/book/locallib.php');
                $book = $DB->get_record('book', ['id' => $instance->id]);
                $chapters = book_preload_chapters($book);
                $chaptercontents = [];
                foreach ($chapters as $chapter) {
                    $chaptercontents[] = $chapter->title . "<br/>" . $chapter->content;
                }
                $content = implode("<br/><br/>", $chaptercontents);
                break;
            default:
                throw new \coding_exception('Unsupported course module/course module type - cmid: ' . $cm->id . ', ' .
                    $cm->modname);
        }

        return empty($content) ? '' : self::format_extracted_cm_content($content);
    }

    /**
     * Extracts content from pdf or image files.
     *
     * This is being done by sending the file to an external AI system that extracts the text from the file.
     *
     * @param \stored_file $file The file to send
     * @return string the extracted content as text
     */
    public function extract_content_from_pdf_or_image(\stored_file $file): string {
        global $DB;
        if ($record = $DB->get_record('qbank_questiongen_resource_cache', ['contenthash' => $file->get_contenthash()])) {
            $record->timelastaccessed = $this->clock->time();
            $DB->update_record('qbank_questiongen_resource_cache', $record);
            return $record->extractedcontent;
        }

        // For example 'application/pdf' is not supported by some AI systems.
        if ($this->is_mimetype_supported_by_ai_system($file->get_mimetype())) {
            $encodedimage = 'data:' . $file->get_mimetype() . ';base64,' . base64_encode($file->get_content());
            $result = $this->retrieve_file_content_from_ai_system($encodedimage);
            $this->store_to_record_cache($file, $result);
            return $result;
        } else if ($file->get_mimetype() === 'application/pdf') {
            // Depending on what models/AI tools are configured, some of them do not support sending PDF files directly.
            // So we have to convert each PDF page to an image and extract the text from the images one by one.
            $content = '';

            $encodedimages = $this->convert_pdf_to_images($file);
            foreach ($encodedimages as $encodedimage) {
                $content .= $this->retrieve_file_content_from_ai_system($encodedimage);
            }
            $this->store_to_record_cache($file, $content);
            return $content;
        } else {
            // Not perfect to throw an exception here. We probably need some image format conversion here.
            throw new \moodle_exception('Unsupported file type: ' . $file->get_mimetype());
        }
    }

    /**
     * Stores the content of a file into the record cache.
     *
     * Extracting text from a file is pretty expensive. After an external LLM has done this successfully, we store the extracted
     * text into a database table indexed by the contenthash of the file, so we can just use it for future uses.
     *
     * @param \stored_file $file the file that we want to store the extracted content for
     * @param string $extractedcontent the extracted content (usually generated by an external AI system)
     */
    public function store_to_record_cache(\stored_file $file, string $extractedcontent): void {
        global $DB;
        $time = $this->clock->time();
        if ($currentrecord = $DB->get_record('qbank_questiongen_resource_cache', ['contenthash' => $file->get_contenthash()])) {
            if ($currentrecord->extractedcontent !== $extractedcontent) {
                $currentrecord->extractedcontent = $extractedcontent;
            }
            $currentrecord->timemodified = $time;
            $currentrecord->timelastaccessed = $time;
            $DB->update_record('qbank_questiongen_resource_cache', $currentrecord);
            return;
        }

        $record = new stdClass();
        $record->contenthash = $file->get_contenthash();
        $record->extractedcontent = $extractedcontent;
        $record->timemodified = $time;
        $record->timecreated = $time;
        $record->timelastaccessed = $time;
        $DB->insert_record('qbank_questiongen_resource_cache', $record);
    }

    /**
     * Helper function to format the extracted content.
     *
     * It basically removes all HTML tags and converts line breaks into text line breaks.
     *
     * @param string $content the content to format
     * @return string the formatted content
     */
    public static function format_extracted_cm_content(string $content): string {
        $content = trim($content);
        return html_to_text($content, 0, false);
    }

    /**
     * Helper function to retrieve the generated question XML from the external LLM.
     *
     * @param array $messages a standardized messages array containing the "conversation" with the LLM
     * @return string[] array with keys 'generatedquestionxml' and 'errormessage'. If 'errormessage' is empty retrieving
     *  was successful, otherwise it contains an error message.
     */
    public function retrieve_llm_response(array $messages): array {
        // TODO Implement different backend(s). It's only local_ai_manager for now.
        $return = [
            'generatedquestiontext' => '',
            'errormessage' => '',
        ];
        $manager = new \local_ai_manager\manager('questiongeneration');
        $lastmessage = array_pop($messages);
        $result = $manager->perform_request(
            $lastmessage['message'],
            'qbank_questiongen',
            $this->contextid,
            ['conversationcontext' => $messages]
        );
        if ($result->get_code() === 200) {
            $return['generatedquestiontext'] = $result->get_content();
            mtrace('Question generation successful. The external LLM returned: ');
            mtrace($result->get_content());
        } else {
            mtrace('Question generation failed. The external LLM returned code ' . $result->get_code() . ':');
            mtrace($result->get_errormessage());
            if (!empty($result->get_debuginfo())) {
                mtrace($result->get_debuginfo());
            }
            // Return the error message.
            $result['errormessage'] = $result->get_errormessage();
        }
        return $return;
    }

    /**
     * Wrapper for the call of an external AI system to extract content from a file.
     *
     * @param string $encodedimage The base64 encoded image to send to the external AI system
     */
    public function retrieve_file_content_from_ai_system(string $encodedimage): string {
        $imageprompt =
            'Return the text that is written on the image/document. Do not wrap any explanatory text around. '
            . 'Return only the bare content.';
        $aimanager = new manager('itt');
        $requestoptions = [
            'image' => $encodedimage,
        ];

        $result = $aimanager->perform_request($imageprompt, 'qbank_questiongen', $this->contextid, $requestoptions);
        if ($result->get_code() !== 200) {
            $errormessage = $result->get_errormessage();
            if (debugging()) {
                $errormessage .= ' Debugging info: ' . $result->get_debuginfo();
            }
            throw new \moodle_exception('Could not extract from PDF. Error: ' . $errormessage);
        }
        return $result->get_content();
    }

    /**
     * Returns if the used external AI system supports the mimetype of a file to extract content from.
     *
     * @param string $mimetype The mimetype of the file we want to extract content from with the external AI system
     * @return bool true if the mimetype is supported, false otherwise
     * @throws questiongen_exception if the connector to the AI system is not properly set up
     */
    public function is_mimetype_supported_by_ai_system(string $mimetype): bool {
        global $USER;
        if (
            ai_manager_utils::get_ai_config($USER, $this->contextid, null, ['itt'])['availability']
            !== ai_manager_utils::AVAILABILITY_AVAILABLE
        ) {
            throw new questiongen_exception('errorimagetotextnotavailable', 'qbank_questiongen');
        }
        $purposeoptions = ai_manager_utils::get_available_purpose_options('itt');
        if (empty($purposeoptions) || empty($purposeoptions['allowedmimetypes'])) {
            throw new questiongen_exception('errorimagetotextnotavailable', 'qbank_questiongen');
        }
        return in_array($mimetype, $purposeoptions['allowedmimetypes']);
    }

    /**
     * Converts a PDF into an array of images.
     *
     * @param \stored_file $file the PDF file to convert to images
     * @return array array of base64 encoded images, one for each page of the PDF
     * @throws questiongen_exception if the PDF is not supported by the library we're using
     */
    public function convert_pdf_to_images(\stored_file $file): array {
        $tmpdir = \make_request_directory();
        $fileextension = explode('/', $file->get_mimetype())[1];
        $tmpfilename = 'qbank_questiongen_tmp_' . uniqid() . '.' . $fileextension;
        file_put_contents($tmpdir . '/' . $tmpfilename, $file->get_content());
        $pdf = new pdf();
        $pdf->set_image_folder($tmpdir);
        try {
            $pdf->set_pdf($tmpdir . '/' . $tmpfilename);
            $images = $pdf->get_images();
        } catch (PdfParserException $exception) {
            throw new \qbank_questiongen\local\questiongen_exception(
                'errorpdfnotsupported',
                'qbank_questiongen',
                '',
                $file->get_filename()
            );
        }
        $imagearray = [];
        foreach ($images as $image) {
            $imagecontent = file_get_contents($tmpdir . '/' . $image);
            $imagearray[] = 'data:' . mime_content_type($tmpdir . '/' . $image) . ';base64,' . base64_encode($imagecontent);
        }
        return $imagearray;
    }
}
