<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.

/**
 * AI Jammer filter definition.
 * @package    filter_aijammer
 * @copyright  2024 Astor Bizard
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */

defined('MOODLE_INTERNAL') || die();
global $CFG;
require_once($CFG->libdir . '/filterlib.php');

/**
 * Main class for AI Jammer filter.
 * @copyright  2024 Astor Bizard
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */
class filter_aijammer extends moodle_text_filter {
    /**
     * Filter adding jamming letters hidden with CSS.
     * @param string $text some HTML content to process.
     * @param array $options options passed to the filters
     * @return string the HTML content after the filtering has been applied.
     * @see moodle_text_filter::filter()
     */
    public function filter($text, array $options = []) {
        if (strlen($text) < 20 && strip_tags($text) === $text) {
            // Text has no tag and is very short, chances are we are within an option of a select element.
            // Do not jam anything as it would visually break the option.
            // It is also acceptable not to jam if we are not in this case (because it is short and heh, whatever).
            return $text;
        }
        $originaltext = $text;

        $entities = $this->replace_and_save_entities($text); // Remove ands ave entities so they are not jammed.
        // Do the jamming!
        $jammed = filter_phrases($text, $this->generate_randomized_jammer(), [ '<style(\s[^>]*?)?>' ], [ '</style>' ]);
        $this->restore_entities($jammed, $entities); // Restore saved entities.
        // To maintain HTML semantic correctness: if text contains only inline tags, then wrap with a span, else with a div.
        $wrappertag = strip_tags($jammed, '<span><a>') === $jammed ? 'span' : 'div';
        // Hide jammed text from screen readers.
        $jammed = html_writer::tag($wrappertag, $jammed, [ 'aria-hidden' => 'true' ]);

        // Put original text for screen readers only.
        $srtext = html_writer::div($this->process_text_for_screen_readers($originaltext), 'sr-only', [
                'style' => 'user-select:none',
        ]);

        return $jammed . $srtext;
    }

    /**
     * Generate a randomized filter for each letter.
     * @return filterobject[] Array of filters, one for each jammed character.
     */
    protected function generate_randomized_jammer() {
        $map = [];
        foreach ([ ord('A'), ord('a') ] as $offset) { // Uppercase and lowercase.
            foreach (range(0, 25) as $letter) {
                $obfuscation = implode('', array_map(function() use ($offset) {
                    return chr(rand(0, 25) + $offset);
                }, range(1, rand(1, 3))));
                $map[] = new filterobject(
                    chr($letter + $offset),
                    '',
                    html_writer::span($obfuscation, 'nolink', [ 'style' => 'font-size:0!important' ]),
                    true
                );
            }
        }
        return $map;
    }

    /**
     * Replace all entities of the form &entity; by custom placeholders.
     * @param string $text The text in which to replace.
     * @return array Saved entities, to be restored with restore_entities().
     */
    protected function replace_and_save_entities(&$text) {
        $pregexp = '/&[#\w]+;/';

        $placeholder = $this->generate_available_placeholder($text);

        $matches = [];
        $entities = [];
        preg_match_all($pregexp, $text, $matches);
        foreach (array_unique($matches[0]) as $key => $value) {
            $entities['|' . $placeholder . $key . '|'] = $value;
        }
        if (!empty($entities)) {
            $text = str_replace($entities, array_keys($entities), $text);
        }
        return $entities;
    }

    /**
     * Restore saved entities into text.
     * @param string $text The text in which to restore entities.
     * @param string $savedentities Saved entities as returned by replace_and_save_entities().
     */
    protected function restore_entities(&$text, $savedentities) {
        if (!empty($savedentities)) {
            $text = str_replace(array_keys($savedentities), $savedentities, $text);
        }
    }

    /**
     * Find an available placeholder for the given text.
     * A placeholder is considered available if the text does not contain any occurence of this placeholder.
     * @param string $text The subject text.
     * @return string An available placeholder.
     */
    protected function generate_available_placeholder($text) {
        $placeholder = '%';
        while (strpos($text, $placeholder) !== false) {
            $placeholder .= $placeholder;
        }
        return $placeholder;
    }

    /**
     * Transform and return given text to be suitable for screen readers.
     * @param string $text The text to process.
     * @return string Cleaned and processed text, suitable to use within sr-only tag.
     */
    protected function process_text_for_screen_readers($text) {
        // Remove these tags with their contents.
        $tagstoremove = [
                'script',
                'textarea',
                'head',
                'style',
        ];
        foreach ($tagstoremove as $tag) {
            $text = preg_replace("#<$tag(?:\s[^>]*?)?>.*?</$tag>#", '', $text);
        }
        // Add semantic line breaks and remove all tags except line breaks and links.
        $text = strip_tags(str_replace([ '</div>', '</p>' ], [ '<br></div>', '<br></p>' ], $text), '<br><a>');
        // Remove all duplicate line breaks.
        while (preg_match('#<br/?>\s*<br/?>#', $text)) {
            $text = preg_replace('#<br/?>\s*<br/?>#', '<br>', $text);
        }
        // Remove [[]] placeholders to avoid duplicate elements.
        $text = preg_replace('/\[\[[^]]*\]\]/', '', $text);
        // Note for cloze (multianswer) questions: the resulting text will contain "qtypemultianswerXmarker" tokens.
        // Though not ideal, this is an acceptable solution to maintain a form of consistency.
        return $text;
    }
}
