From 6854cb3f4d8219cf1829e32122eb2502a916eae9 Mon Sep 17 00:00:00 2001 From: Andreas Baumann Date: Sat, 1 Feb 2020 09:05:48 +0100 Subject: initial checkin --- plugins/dokuwiki/inc/parser/parser.php | 932 +++++++++++++++++++++++++++++++++ 1 file changed, 932 insertions(+) create mode 100644 plugins/dokuwiki/inc/parser/parser.php (limited to 'plugins/dokuwiki/inc/parser/parser.php') diff --git a/plugins/dokuwiki/inc/parser/parser.php b/plugins/dokuwiki/inc/parser/parser.php new file mode 100644 index 0000000..b77af88 --- /dev/null +++ b/plugins/dokuwiki/inc/parser/parser.php @@ -0,0 +1,932 @@ + array('listblock','table','quote','hr'), + + // some mode are allowed inside the base mode only + 'baseonly' => array('header'), + + // modes for styling text -- footnote behaves similar to styling + 'formatting' => array('strong', 'emphasis', 'underline', 'monospace', + 'subscript', 'superscript', 'deleted', 'footnote'), + + // modes where the token is simply replaced - they can not contain any + // other modes + 'substition' => array('acronym','smiley','wordblock','entity', + 'camelcaselink', 'internallink','media', + 'externallink','linebreak','emaillink', + 'windowssharelink','filelink','notoc', + 'nocache','multiplyentity','quotes','rss'), + + // modes which have a start and end token but inside which + // no other modes should be applied + 'protected' => array('preformatted','code','file','php','html'), + + // inside this mode no wiki markup should be applied but lineendings + // and whitespace isn't preserved + 'disabled' => array('unformatted'), + + // used to mark paragraph boundaries + 'paragraphs' => array('eol') +); + +//------------------------------------------------------------------- + +/** +* Sets up the Lexer with modes and points it to the Handler +* For an intro to the Lexer see: wiki:parser +*/ +class Doku_Parser { + + var $Handler; + + var $Lexer; + + var $modes = array(); + + var $connected = FALSE; + + function addBaseMode(& $BaseMode) { + $this->modes['base'] = & $BaseMode; + if ( !$this->Lexer ) { + $this->Lexer = new Doku_Lexer($this->Handler,'base', TRUE); + } + $this->modes['base']->Lexer = & $this->Lexer; + } + + /** + * PHP preserves order of associative elements + * Mode sequence is important + */ + function addMode($name, & $Mode) { + if ( !isset($this->modes['base']) ) { + $this->addBaseMode(new Doku_Parser_Mode_base()); + } + $Mode->Lexer = & $this->Lexer; + $this->modes[$name] = & $Mode; + } + + function connectModes() { + + if ( $this->connected ) { + return; + } + + foreach ( array_keys($this->modes) as $mode ) { + + // Base isn't connected to anything + if ( $mode == 'base' ) { + continue; + } + + $this->modes[$mode]->preConnect(); + + foreach ( array_keys($this->modes) as $cm ) { + + if ( $this->modes[$cm]->accepts($mode) ) { + $this->modes[$mode]->connectTo($cm); + } + + } + + $this->modes[$mode]->postConnect(); + } + + $this->connected = TRUE; + } + + function parse($doc) { + if ( $this->Lexer ) { + $this->connectModes(); + // Normalize CRs and pad doc + $doc = "\n".str_replace("\r\n","\n",$doc)."\n"; + $this->Lexer->parse($doc); + $this->Handler->_finalize(); + return $this->Handler->calls; + } else { + return FALSE; + } + } + +} + +//------------------------------------------------------------------- +/** + * This class and all the subclasses below are + * used to reduce the effort required to register + * modes with the Lexer. For performance these + * could all be eliminated later perhaps, or + * the Parser could be serialized to a file once + * all modes are registered + * + * @author Harry Fuecks +*/ +class Doku_Parser_Mode { + + var $Lexer; + + var $allowedModes = array(); + + // returns a number used to determine in which order modes are added + function getSort() { + trigger_error('getSort() not implemented in '.get_class($this), E_USER_WARNING); + } + + // Called before any calls to connectTo + function preConnect() {} + + // Connects the mode + function connectTo($mode) {} + + // Called after all calls to connectTo + function postConnect() {} + + function accepts($mode) { + return in_array($mode, $this->allowedModes ); + } + +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_base extends Doku_Parser_Mode { + + function __construct() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['container'], + $PARSER_MODES['baseonly'], + $PARSER_MODES['paragraphs'], + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['protected'], + $PARSER_MODES['disabled'] + ); + } + + function getSort() { + return 0; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_footnote extends Doku_Parser_Mode { + + function __construct() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['container'], + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['protected'], + $PARSER_MODES['disabled'] + ); + + unset($this->allowedModes[array_search('footnote', $this->allowedModes)]); + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern( + '\x28\x28(?=.*\x29\x29)',$mode,'footnote' + ); + } + + function postConnect() { + $this->Lexer->addExitPattern( + '\x29\x29','footnote' + ); + } + + function getSort() { + return 150; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_header extends Doku_Parser_Mode { + + function preConnect() { + //we're not picky about the closing ones, two are enough + $this->Lexer->addSpecialPattern( + '[ \t]*={2,}[^\n]+={2,}[ \t]*(?=\n)', + 'base', + 'header' + ); + } + + function getSort() { + return 50; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_notoc extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('~~NOTOC~~',$mode,'notoc'); + } + + function getSort() { + return 30; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_nocache extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('~~NOCACHE~~',$mode,'nocache'); + } + + function getSort() { + return 40; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_linebreak extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('\x5C{2}(?=\s)',$mode,'linebreak'); + } + + function getSort() { + return 140; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_eol extends Doku_Parser_Mode { + + function connectTo($mode) { + $badModes = array('listblock','table'); + if ( in_array($mode, $badModes) ) { + return; + } + $this->Lexer->addSpecialPattern('\n',$mode,'eol'); + } + + function getSort() { + return 370; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_hr extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern('\n[ \t]*-{4,}[ \t]*(?=\n)',$mode,'hr'); + } + + function getSort() { + return 160; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_formatting extends Doku_Parser_Mode { + var $type; + + var $formatting = array ( + 'strong' => array ( + 'entry'=>'\*\*(?=.*\*\*)', + 'exit'=>'\*\*', + 'sort'=>70 + ), + + 'emphasis'=> array ( + 'entry'=>'//(?=[^\x00]*[^:]//)', //hack for bug #384 + 'exit'=>'//', + 'sort'=>80 + ), + + 'underline'=> array ( + 'entry'=>'__(?=.*__)', + 'exit'=>'__', + 'sort'=>90 + ), + + 'monospace'=> array ( + 'entry'=>'\x27\x27(?=.*\x27\x27)', + 'exit'=>'\x27\x27', + 'sort'=>100 + ), + + 'subscript'=> array ( + 'entry'=>'(?=.*)', + 'exit'=>'', + 'sort'=>110 + ), + + 'superscript'=> array ( + 'entry'=>'(?=.*)', + 'exit'=>'', + 'sort'=>120 + ), + + 'deleted'=> array ( + 'entry'=>'(?=.*)', + 'exit'=>'', + 'sort'=>130 + ), + ); + + function __construct($type) { + global $PARSER_MODES; + + if ( !array_key_exists($type, $this->formatting) ) { + trigger_error('Invalid formatting type '.$type, E_USER_WARNING); + } + + $this->type = $type; + + // formatting may contain other formatting but not it self + $modes = $PARSER_MODES['formatting']; + $key = array_search($type, $modes); + if ( is_int($key) ) { + unset($modes[$key]); + } + + $this->allowedModes = array_merge ( + $modes, + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'] + ); + } + + function connectTo($mode) { + + // Can't nest formatting in itself + if ( $mode == $this->type ) { + return; + } + + $this->Lexer->addEntryPattern( + $this->formatting[$this->type]['entry'], + $mode, + $this->type + ); + } + + function postConnect() { + + $this->Lexer->addExitPattern( + $this->formatting[$this->type]['exit'], + $this->type + ); + + } + + function getSort() { + return $this->formatting[$this->type]['sort']; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_listblock extends Doku_Parser_Mode { + + function __construct() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'], + $PARSER_MODES['protected'] #XXX new + ); + + // $this->allowedModes[] = 'footnote'; + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n {2,}[\-\*]',$mode,'listblock'); + $this->Lexer->addEntryPattern('\n\t{1,}[\-\*]',$mode,'listblock'); + + $this->Lexer->addPattern('\n {2,}[\-\*]','listblock'); + $this->Lexer->addPattern('\n\t{1,}[\-\*]','listblock'); + + } + + function postConnect() { + $this->Lexer->addExitPattern('\n','listblock'); + } + + function getSort() { + return 10; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_table extends Doku_Parser_Mode { + + function __construct() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'], + $PARSER_MODES['protected'] + ); + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n\^',$mode,'table'); + $this->Lexer->addEntryPattern('\n\|',$mode,'table'); + } + + function postConnect() { + $this->Lexer->addPattern('\n\^','table'); + $this->Lexer->addPattern('\n\|','table'); + #$this->Lexer->addPattern(' {2,}','table'); + $this->Lexer->addPattern('[\t ]+','table'); + $this->Lexer->addPattern('\^','table'); + $this->Lexer->addPattern('\|','table'); + $this->Lexer->addExitPattern('\n','table'); + } + + function getSort() { + return 60; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_unformatted extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('(?=.*)',$mode,'unformatted'); + $this->Lexer->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt'); + } + + function postConnect() { + $this->Lexer->addExitPattern('','unformatted'); + $this->Lexer->addExitPattern('%%','unformattedalt'); + $this->Lexer->mapHandler('unformattedalt','unformatted'); + } + + function getSort() { + return 170; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_php extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('(?=.*)',$mode,'php'); + } + + function postConnect() { + $this->Lexer->addExitPattern('','php'); + } + + function getSort() { + return 180; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_html extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('(?=.*)',$mode,'html'); + } + + function postConnect() { + $this->Lexer->addExitPattern('','html'); + } + + function getSort() { + return 190; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_preformatted extends Doku_Parser_Mode { + + function connectTo($mode) { + // Has hard coded awareness of lists... + $this->Lexer->addEntryPattern('\n (?![\*\-])',$mode,'preformatted'); + $this->Lexer->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted'); + + // How to effect a sub pattern with the Lexer! + $this->Lexer->addPattern('\n ','preformatted'); + $this->Lexer->addPattern('\n\t','preformatted'); + + } + + function postConnect() { + $this->Lexer->addExitPattern('\n','preformatted'); + } + + function getSort() { + return 20; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_code extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern(')',$mode,'code'); + } + + function postConnect() { + $this->Lexer->addExitPattern('','code'); + } + + function getSort() { + return 200; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_file extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addEntryPattern('(?=.*)',$mode,'file'); + } + + function postConnect() { + $this->Lexer->addExitPattern('','file'); + } + + function getSort() { + return 210; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_quote extends Doku_Parser_Mode { + + function __construct() { + global $PARSER_MODES; + + $this->allowedModes = array_merge ( + $PARSER_MODES['formatting'], + $PARSER_MODES['substition'], + $PARSER_MODES['disabled'], + $PARSER_MODES['protected'] #XXX new + ); + #$this->allowedModes[] = 'footnote'; + #$this->allowedModes[] = 'preformatted'; + #$this->allowedModes[] = 'unformatted'; + } + + function connectTo($mode) { + $this->Lexer->addEntryPattern('\n>{1,}',$mode,'quote'); + } + + function postConnect() { + $this->Lexer->addPattern('\n>{1,}','quote'); + $this->Lexer->addExitPattern('\n','quote'); + } + + function getSort() { + return 220; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_acronym extends Doku_Parser_Mode { + // A list + var $acronyms = array(); + var $pattern = ''; + + function __construct($acronyms) { + $this->acronyms = $acronyms; + } + + function preConnect() { + if(!count($this->acronyms)) return; + + $bound = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]'; + $acronyms = array_map('Doku_Lexer_Escape',$this->acronyms); + $this->pattern = '(?<=^|'.$bound.')(?:'.join('|',$acronyms).')(?='.$bound.')'; + } + + function connectTo($mode) { + if(!count($this->acronyms)) return; + + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'acronym'); + } + } + + function getSort() { + return 240; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_smiley extends Doku_Parser_Mode { + // A list + var $smileys = array(); + var $pattern = ''; + + function __construct($smileys) { + $this->smileys = $smileys; + } + + function preConnect() { + if(!count($this->smileys)) return; + + $sep = ''; + // Nux: fix for potential pattern overflow... + $this->pattern = ''; + foreach ( $this->smileys as $smiley ) { + $this->pattern .= $sep.Doku_Lexer_Escape($smiley); + $sep = '|'; + } + } + + function connectTo($mode) { + if(!count($this->smileys)) return; + + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'smiley'); + } + } + + function getSort() { + return 230; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_wordblock extends Doku_Parser_Mode { + // A list + var $badwords = array(); + var $pattern = ''; + + function __construct($badwords) { + $this->badwords = $badwords; + } + + function preConnect() { + + if ( count($this->badwords) == 0 ) { + return; + } + + $sep = ''; + // Nux: fix for potential pattern overflow... + $this->pattern = ''; + foreach ( $this->badwords as $badword ) { + $this->pattern .= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)'; + $sep = '|'; + } + + } + + function connectTo($mode) { + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'wordblock'); + } + } + + function getSort() { + return 250; + } +} + +//------------------------------------------------------------------- +/** +* @TODO Quotes and 640x480 are not supported - just straight replacements here +*/ +class Doku_Parser_Mode_entity extends Doku_Parser_Mode { + // A list + var $entities = array(); + var $pattern = ''; + + function __construct($entities) { + $this->entities = $entities; + } + + function preConnect() { + if(!count($this->entities)) return; + + $sep = ''; + // Nux: fix for potential pattern overflow... + $this->pattern = ''; + foreach ( $this->entities as $entity ) { + $this->pattern .= $sep.Doku_Lexer_Escape($entity); + $sep = '|'; + } + } + + function connectTo($mode) { + if(!count($this->entities)) return; + + if ( strlen($this->pattern) > 0 ) { + $this->Lexer->addSpecialPattern($this->pattern,$mode,'entity'); + } + } + + function getSort() { + return 260; + } +} + +//------------------------------------------------------------------- +// Implements the 640x480 replacement +class Doku_Parser_Mode_multiplyentity extends Doku_Parser_Mode { + + function connectTo($mode) { + + $this->Lexer->addSpecialPattern( + '(?<=\b)\d+[xX]\d+(?=\b)',$mode,'multiplyentity' + ); + + } + + function getSort() { + return 270; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_quotes extends Doku_Parser_Mode { + + function connectTo($mode) { + + $this->Lexer->addSpecialPattern( + '(?<=^|\s)\'(?=\S)',$mode,'singlequoteopening' + ); + $this->Lexer->addSpecialPattern( + '(?<=^|\S)\'',$mode,'singlequoteclosing' + ); + $this->Lexer->addSpecialPattern( + '(?<=^|\s)"(?=\S)',$mode,'doublequoteopening' + ); + $this->Lexer->addSpecialPattern( + '(?<=^|\S)"',$mode,'doublequoteclosing' + ); + + } + + function getSort() { + return 280; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_camelcaselink extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink' + ); + } + + function getSort() { + return 290; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_internallink extends Doku_Parser_Mode { + + function connectTo($mode) { + // Word boundaries? + $this->Lexer->addSpecialPattern("\[\[.+?\]\]",$mode,'internallink'); + } + + function getSort() { + return 300; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_media extends Doku_Parser_Mode { + + function connectTo($mode) { + // Word boundaries? + $this->Lexer->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media'); + } + + function getSort() { + return 320; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_rss extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern("\{\{rss>[^\}]+\}\}",$mode,'rss'); + } + + function getSort() { + return 310; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_externallink extends Doku_Parser_Mode { + var $schemes = array('http','https','telnet','gopher','wais','ftp','ed2k','irc','ldap'); + var $patterns = array(); + + function preConnect() { + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + foreach ( $this->schemes as $scheme ) { + $this->patterns[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; + } + + $this->patterns[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; + $this->patterns[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; + + } + + function connectTo($mode) { + foreach ( $this->patterns as $pattern ) { + $this->Lexer->addSpecialPattern($pattern,$mode,'externallink'); + } + } + + function getSort() { + return 330; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_filelink extends Doku_Parser_Mode { + + var $pattern; + + function preConnect() { + + $ltrs = '\w'; + $gunk = '/\#~:.?+=&%@!\-'; + $punc = '.:?\-;,'; + $host = $ltrs.$punc; + $any = $ltrs.$gunk.$punc; + + $this->pattern = '\b(?i)file(?-i)://['.$any.']+?['. + $punc.']*[^'.$any.']'; + } + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + $this->pattern,$mode,'filelink'); + } + + function getSort() { + return 360; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_windowssharelink extends Doku_Parser_Mode { + + var $pattern; + + function preConnect() { + $this->pattern = "\\\\\\\\\w+?(?:\\\\[\w$]+)+"; + } + + function connectTo($mode) { + $this->Lexer->addSpecialPattern( + $this->pattern,$mode,'windowssharelink'); + } + + function getSort() { + return 350; + } +} + +//------------------------------------------------------------------- +class Doku_Parser_Mode_emaillink extends Doku_Parser_Mode { + + function connectTo($mode) { + $this->Lexer->addSpecialPattern("<[\w0-9\-_.]+?@[\w\-]+\.[\w\-\.]+\.*[\w]+>",$mode,'emaillink'); + } + + function getSort() { + return 340; + } +} + + +//Setup VIM: ex: et ts=4 enc=utf-8 : -- cgit v1.2.3-70-g09d2