0a1,652
+ [@
+ %3c?php if (!defined('PmWiki')) exit();
+
+ # Autolink2 / autolink2 version 0.3
+ # PmWiki plug-in
+ # written by Christian Heller / http://www.plomlompom.de
+ # originally inspired by Karl Loncarek's http://www.pmwiki.org/wiki/Cookbook/AutomaticLinks
+ #
+ # This program is free software; you can redistribute it and/or modify it under the terms of the GNU
+ # General Public License as published by the Free Software Foundation; either version 2 of the
+ # License, or (at your option) any later version.
+ #
+ # SHORT EXPLANATION
+ #
+ # Automatically links phrases in PmWiki page texts to pages in the same wiki group that have a
+ # sufficiently similar name, giving a (user-definable) tolerance towards flexible word endings,
+ # umlauts and case insensitivity. Does not alter the page text itself, only creates the links during
+ # and for the page display.
+ #
+ # Also provides for a (:autolink backlinks:) PmWiki directive that outputs a linked list of pages
+ # that link back to the current page in the automatic fashion just described.
+ #
+ # INSTALLATION / ACTIVATION
+ #
+ # To install, copy this file into your PmWiki cookbook/ directory and add these lines to the config
+ # file of the page group for which you want to activate the plug-in:
+ #
+ # include_once("$FarmD/cookbook/autolink2.php");
+ # AutolinkActivate();
+ #
+ # The next time a page from the respective group gets loaded by a browser, the Autolink database
+ # gets generated. Be careful not to do this in a production environment: Different generation
+ # processes for the same database will get into each other's way, and different people's parallel
+ # page loads will trigger parallel database generation processes.
+ #
+ # In the best case, after the first page load, everything should work as described above right away.
+ # But depending on the number and size of your wiki pages to be autolinked, the database generation
+ # process may take a while and, if server time limits are exceeded, even produce a server error
+ # message. If this happens, the database generation process can be resumed at the point it stopped
+ # by simple reloading. But be careful not to reload too early (check for files still getting changed
+ # in the directory for your group in autolink-data/) so as not to trigger parallel processes.
+ #
+ # See the technical overview section below for more information.
+ #
+ # TECHNICAL OVERVIEW
+ #
+ # A finished Autolink database consists of a directory with the name of the autolinked page group in
+ # $WorkDir/autolink-data/. In it, you will find files on all pages of that group, each carrying its
+ # respective pagename (without its groupname part) and containing three lines:
+ #
+ # The first line contains the regex / regular expression matching phrases that Autolink thinks
+ # should be linked to this page. It is created by AutolinkGenerateRegex() and its logic and
+ # tolerance levels can be adjusted in that very function.
+ #
+ # The second line contains the autolinks-out: a list of pagenames the Autolink regexes of which
+ # match phrases on the page's text.
+ #
+ # The third line contains the autolinks-in: a list of pagenames pointing to pages on which the
+ # current pagename's regex matches phrases.
+ #
+ # During page display, AutolinkActivate() reads the second line of the Autolink file of the page
+ # displayed and fills $AutolinksOut with the autolinks-out pagename list contained therein. It then
+ # collects the regexes for all these pagenames from their respective files and creates a PmWiki
+ # markup for each of them. Phrases on a page matched by this markup get sent to AutolinkSet() which
+ # tries to find the best possible match from $AutolinksOut for each of them; in cases where a phrase
+ # is similar to more than one pagename, AutolinkSet() has some comparison and scoring logic built-in
+ # to decide with.
+ #
+ # AutolinkActivate() also provides the markup for the (:autolink backlinks:) directive. It calls
+ # AutolinkGetBacklinksString() for its output, which reads in the third line of the Autolink file on
+ # the page displayed, translating the autolinks-in list into HTML code for a linked backlink list.
+ #
+ # The autolinks-in and autolinks-out fields of Autolink page files get updated during each page
+ # update, deletion or creation by AutolinkPageUpdate() and, called by this function if necessary,
+ # AutolinkPageCreationWork() and AutolinkPageDeletionWork(). AutolinkPageUpdate() gets inserted into
+ # PmWiki's page update function chain in $EditFunctions at the beginning of AutolinkActivate().
+ #
+ # The initial database creation is handled by AutolinkCreateDatabase(), called by AutolinkActivate()
+ # if the latter finds no Autolink directory for the current group or if a file 'DB-Creation-'
+ # followed by the name of the current page group is found in Autolink's main directory. This is a
+ # temporary file created by AutolinkCreateDatabase(), guiding it and only deleted when the initial
+ # database creation work is finished. If the latter is interrupted, the file will remain and allow
+ # the function to pick up its work where it left the last time.
+ #
+ # You can check the progress of database creation by taking a look at the DB-Creation file between
+ # reloads: It contains a list of pagenames prepended by symbols signaling their state of integration
+ # into the database, moving from '?' (no work done) to '!' (some work done) and finally to '-' (work
+ # finished).
+ #
+ # ISSUES / BUGS
+ #
+ # * Autolink assumes UTF-8 encoding for your page display and your page content to be autolinked. In
+ # theory, it should be usable for ASCII-only content even if that is not encoded as UTF-8 (due to
+ # UTF-8's ASCII compatibility). Anything beyond ASCII, though, needs to be in UTF-8. The multibyte
+ # string library is expected to be enabled by your PHP installation, as various functions of it
+ # are called. If you feel adventurous, change $AutolinkEncoding to any other multibyte encoding
+ # (provided you also save this script in that encoding and let it go over a PmWiki encoded thus).
+ #
+ # * On the other hand, Autolink expects pagenames to be ASCII-only.
+ #
+ # * If for any reason (like a timeout) an Autolink page update or deletion function is not finished,
+ # autolinking data may not be up-to-date. AutolinkRepairData() only catches failures of individal
+ # file_put_content() processes (to avoid empty files with empty regexes, matching and marking up
+ # any emptiness found on page as autolinks to the page whose file was corrupted) but doesn't check
+ # for validity of references or whether a file should be deleted. Most of the times this happens,
+ # Autolink data distortion should be marginal. But if you fear database corruption, just delete
+ # your group directory in autolink-data/ to trigger database regeneration by the next reload.
+ #
+ # * What PmWiki autolinks on a page display depends on the pagenames listed in $AutolinksOut, which
+ # originally are collected by matching all regexes against the 'text=' field of a PmWiki page.
+ # Once a page is displayed, though, Autolink will try to link *everything* (outside of HTML tags
+ # and already linked material) in PmWiki's page rendering against those pagenames, including text
+ # outside of the actual 'text=' field: like, occasionally, the title. This is not a critical
+ # error, but can create an impression of incoherence--one string autolinked in the title of page
+ # A, but not in the title of page B.
+ #
+ # * Don't expect patterns containing HTML entities like '&' to match right away even if those
+ # entities are allowed in $AutolinkGapsToAllowLong. PmWiki's internal encoding of a '&' in its
+ # pagefiles' 'text=' fields, from which matches are collected to fill in the autolinks fields in
+ # Autolink's files, is still '&' and only becomes '&' during page display. If, however, by
+ # any other occurence in 'text=' the respective pagename is still matched, its occurences
+ # containing a '&' will also be marked up during page display.
+ #
+ # WEBSITE
+ #
+ # For more information and updates on bugs and code changes, see
+ # http://www.plomlompom.de/wiki/pmwiki.php?n=Mind.AutoLink2
+
+
+
+ ########################################
+ # Autolink common global variables #
+ ########################################
+
+ $AutolinkEncoding = 'UTF-8';
+
+ $AutolinkMinimalRoot = 4; # These variables mostly influence AutolinkGenerateRegex()'s regular
+ $AutolinkSuffixTolerance = 3; # expression generation work (some also influence AutolinkSet() pattern
+ $AutolinkGapsToAllowEasy = ' .,:;'; # recognition work).
+ $AutolinkGapsToAllowLong = array(); #
+ $AutolinkGapsToAllowHard = array('\'', '/', '\\', '(', ')', '[', ']'); # The comments for AutolinkGenerateRegex() offer thorough explanations.
+ $AutolinkUmlautTable = array('äÄ' => array('ae', 'Ae'), #
+ 'öÖ' => array('oe', 'Oe'), # Change these to influence Autolink's pattern recognition tolerances.
+ 'üÜ' => array('ue', 'Ue'), # Feel free, for example, to add French accented characters to the Umlaut
+ 'ß' => array('ss')); # table, or to disallow certain characters as tolerated in gaps.
+
+ $AutolinkBannedPages = array('RecentChanges', #
+ 'GroupHeader', # We don't want Autolink to work on or with these system pages.
+ 'GroupFooter', # We can add other pages where we feel Autolink would create a mess.
+ 'PageActions'); #
+
+ $AutolinkClass = 'autolink'; # Class attribute for HTML %3ca> tag of autolinks. Use for fancy CSS games.
+
+ $AutolinkDir = $WorkDir.'/autolink-data'; #
+ list($AutolinkGroupname, $AutolinkPagename) = AutolinkDividePagefileName($pagename); #
+ $AutolinkGroupDir = $AutolinkDir.'/'.$AutolinkGroupname; # Autolink's pagename, groupname and filesystem variables.
+ $AutolinkDBCreationFile = $AutolinkDir.'/DB-Creation-'.$AutolinkGroupname; #
+ $AutolinkPath = $AutolinkGroupDir.'/'.$AutolinkPagename; #
+
+ $AutolinksOut = array(); # This will be populated by AutolinkActivate().
+
+ ########################################
+ # Autolink common helper functions #
+ ########################################
+
+ function AutolinkDividePagefileName($filename)
+ # Divide a PmWiki pagefile name into the group and the name part.
+ { $dot_position = strpos($filename, '.');
+ return array(substr($filename, 0, $dot_position), substr($filename, $dot_position+1)); }
+
+
+
+ function AutolinkGetRegexFromFile($filename)
+ # Get the regex saved in the first line of $filename in $AutolinkGroupDir.
+ { global $AutolinkGroupDir;
+ $p_file = fopen($AutolinkGroupDir.'/'.$filename, 'r');
+ $regex = substr(fgets($p_file), 0, -1);
+ fclose($p_file);
+ return $regex; }
+
+
+
+ ########################################
+ # #
+ # A u t o l i n k m a r k u p #
+ # #
+ ########################################
+
+ function AutolinkActivate()
+ # Create markups for Autolink's display of a page following its file in the Autolink database. If a database for its group does not exist, call AutolinkCreateDatabase() for it.
+ { global $EditFunctions, $AutolinkGroupDir, $AutolinksOut, $AutolinkPath, $AutolinkDBCreationFile, $AutolinkPagename, $AutolinkBannedPages, $AutolinkDir, $AutolinkGroupname,
+ $action;
+
+ if (!is_dir($AutolinkGroupDir)) AutolinkCreateDatabase(); # Check if $AutolinkGroupname database exists in any finished form;
+ # if not, start or continue its creation.
+ AutolinkRepairData(); #
+ # This is also the stage to apply any Autolink filesystem repair work via
+ if (is_file($AutolinkDBCreationFile)) AutolinkCreateDatabase(); # AutolinkRepairData().
+
+ $position_PostPage = array_search('PostPage', $EditFunctions); # PmWiki's $EditFunctions enumerates functions called by the page update
+ array_splice($EditFunctions, $position_PostPage+1, 0, 'AutolinkUpdatePage'); # process. We add our very own AutolinkUpdatePage() right after PostPage().
+
+ if (!is_file($AutolinkPath) # Don't do anything if there is no Autolink file for the page displayed
+ or in_array($AutolinkPagename, $AutolinkBannedPages) # or it belongs to $AutolinkBannedPages
+ or ($action != 'browse')) # or we are doing anything but browsing / passively looking at the page.
+ return; #
+
+ $p_file = fopen($AutolinkPath, 'r'); # Get the autolinks-out from the displayed page's Autolink file.
+ fgets($p_file); # Strip the newline character from the $autolinks_out line's end.
+ $AutolinksOut = explode(',', substr(fgets($p_file), 0, -1)); #
+ fclose($p_file);
+
+ $last_markup_name = 'restore'; # For each linked pagename in $AutolinksOut, create a markup to find
+ foreach ($AutolinksOut as $linked_page) # autolinks-out to that page via the regex for its name.
+ if ($linked_page) #
+ { $regex = AutolinkGetRegexFromFile($linked_page); # In PmWiki's markup chain, Autolink fits best right after 'style',
+ $new_markup_name = 'autolink-'.$linked_page; # so that's where we place Autolink's first markup for the page displayed.
+
+ Markup($new_markup_name, '>'.$last_markup_name, '/('.$regex.')(?=[^>]*($|%3c(?!\/(a|script))))/ieu', 'AutolinkSet("$1")');
+ # ^ This is to make sure ... ^
+ # ... we don't catch patterns 1) %3ctag value="inside any HTML tag" /> or 2) %3ca href=''>enclosed by the a %3c/a>%3cscript> or the script tag%3c/script>, i.e. only patterns ...
+ # DECIPHERMENT: (?= [^>]* ($| %3c (?! \/(a|script)) ))
+ # ... followed by ... any number of signs not a '>' ... until end of the string or ... until a '%3c' appears ... not followed by ... any '/a' or '/script' .
+
+ $last_markup_name = $new_markup_name; } # Any further Autolink markup is placed directly after the previous one.
+
+ Markup('autolink_backlinks', '%3cnl0', '/\(:autolink backlinks:\)/e', # (:autolink backlinks:) PmWiki markup, calling
+ 'Keep(AutolinkGetBacklinksString())'); } # AutolinkGetBacklinksString().
+
+
+
+
+ function AutolinkSet($pattern)
+ # Return autolink from $AutolinksOut most fitting to $pattern. Called by the markups generated in AutolinkActivate().
+ { global $pagename, $ScriptUrl, $EnablePathInfo, $AutolinkGroupname, $AutolinksOut, $AutolinkGroupDir, $AutolinkUmlautTable, $AutolinkEncoding, $AutolinkGapsToAllowEasy,
+ $AutolinkGapsToAllowLong, $AutolinkGapsToAllowHard, $AutolinkClass;
+
+ $pattern_UmlautsNo = $pattern; # Most often, a $pattern's deviation from the pagenames whose regexes
+ foreach ($AutolinkUmlautTable as $umlaut => $transl) # match it will concern only umlauts and gaps between pagename parts.
+ { $umlaut_lower = mb_substr($umlaut, 0, 1, $AutolinkEncoding); #
+ $umlaut_upper = mb_substr($umlaut, 1, 1, $AutolinkEncoding); # To test such deviations, simple comparisons of pagenames to a small
+ $pattern_UmlautsNo = str_replace($umlaut_lower, $transl[0], $pattern_UmlautsNo); # number of non-regex $pattern mutations created here will suffice.
+ if ($umlaut_upper != '') #
+ $pattern_UmlautsNo = str_replace($umlaut_upper, $transl[1], $pattern_UmlautsNo); } # To understand how $pattern_UmlautsNo is created, remember that
+ $pattern_HyphenYes = strtolower(str_replace(array_merge(str_split($AutolinkGapsToAllowEasy), # $AutolinkUmlautTable consists of key => value pairs like
+ $AutolinkGapsToAllowHard), '', $pattern_UmlautsNo)); # 'ß' => array('ss') or 'äÄ' => array('ae', 'Ae').
+ $pattern_HyphenNo = str_replace('-', '', $pattern_HyphenYes ); #
+
+ $val_max = 9000; # Cycle pagenames autolinked in the currently displayed page, comparing
+ $page_matching = array(NULL, 0); # them against $pattern. $page_matching[0] aims to catch the best possible
+ foreach ($AutolinksOut as $linked_pagename) # match, determined by the similarity score in $page_matching[1].
+ {
+ $pagename_lower = strtolower($linked_pagename); # If the all-lowercase versions of a $pattern reduced to its essentials
+ if ($pagename_lower == $pattern_HyphenYes) # and $pagename_lowercase match, that's perfect. We don't need to compare
+ { $page_matching[0] = $linked_pagename; # or calculate any similarity score to decide:
+ break; } # $page_matching finishes with $linked_pagename.
+ #
+ if ($page_matching[1] %3c $val_max) # Next to the perfect match above, the best possible exit is the lowercases
+ { $pagename_HyphenNo = str_replace('-', '', $pagename_lower); # of $pattern and $linked_page matching but for the appearance of hyphens.
+ if ($pagename_HyphenNo == $pattern_HyphenNo) # $page_matching collects such a match with the highest possible similarity
+ $page_matching = array($linked_pagename, $val_max); } } # score; can only be beaten by the earlier condition's 100%25 lowercase match.
+
+ if ($page_matching[1] %3c $val_max) # If the matching tests above failed, re-cycle to try out regex matching.
+ foreach ($AutolinksOut as $linked_pagename) # Avoid unnecessary regex work by first comparing $pattern and pagename
+ if (strtolower($linked_pagename[0]) == strtolower($pattern_UmlautsNo[0])) # in their first letter. If OK, get the pagename's regex from
+ { $p_file = fopen($AutolinkGroupDir.'/'.$linked_pagename, 'r'); # $AutolinkGroupDir.
+ $regex = substr(fgets($p_file), 0, -1); #
+ if (preg_match('/'.$regex.'/iu', $pattern)) # If regex matches $pattern, calculate the match's similarity score via
+ { $similarity_score = $val_max - levenshtein($linked_pagename, $pattern, 1, 2, 1); # levenshtein(). Some voodoo is involved in the latter's costs selection.
+ if ($similarity_score > $page_matching[1]) #
+ $page_matching = array($linked_pagename, $similarity_score); } # If the similarity score surpasses that of $page_matching, overwrite
+ fclose($p_file); } # current pagename and similarity score.
+
+ return '%3ca class="'.$AutolinkClass.'" href='.($EnablePathInfo ? $ScriptUrl.'/' : $ScriptUrl. # Create autolink HTML. The path form is determined by PmWiki's $ScriptUrl
+ '?n=').$AutolinkGroupname.'.'.$page_matching[0].'>'.$pattern.'%3c/a>'; } # and $EnablePathInfo. $AutolinkClass allows for stylesheet application.
+
+
+
+ function AutolinkGetBacklinksString()
+ # Return a string of a list in HTML of Autolink backlinks to the current page.
+ { global $AutolinkGroupDir, $AutolinkPagename, $AutolinkGroupname;
+
+ $p_file = fopen($AutolinkGroupDir.'/'.$AutolinkPagename, 'r');
+ fgets($p_file);
+ fgets($p_file);
+ $backlinks = explode(',', fgets($p_file));
+ fclose($p_file);
+ $string_backlinks = '%3cul>';
+ foreach ($backlinks as $backlink)
+ $string_backlinks .= '%3cli>%3ca href="pmwiki.php?n='.$AutolinkGroupname.'.'.$backlink.'">'.$backlink.'%3c/a>%3c/li>';
+ $string_backlinks .= '%3c/ul>';
+ return $string_backlinks; }
+
+
+
+ #############################################################################
+ # #
+ # A u t o l i n k d a t a b a s e m a n i p u l a t i o n #
+ # #
+ #############################################################################
+
+
+
+ ########################################################
+ # Autolink database manipulation: major functions #
+ ########################################################
+
+ function AutolinkCreateDatabase()
+ # Create the Autolink database in the filesystem, populate it with autolinking references and regexes. Guided by $AutolinkDBCreationFile, can resume previous aborted attempts.
+ { global $WorkDir, $AutolinkDir, $AutolinkGroupDir, $AutolinkGroupname, $AutolinkBannedPages, $AutolinkDBCreationFile;
+
+ if (!is_dir($AutolinkDir)) mkdir($AutolinkDir); # Make sure the Autolink directories exist.
+
+ if (is_file($AutolinkDBCreationFile)) $p_dbfile = fopen($AutolinkDBCreationFile, 'r+'); # $AutolinkDBCreationFile is a temporary file created at the beginning
+ else # and deleted at the end of the Autolink database creation process.
+ { mkdir($AutolinkGroupDir); # It carries the list of pagenames of a given PmWiki group for which files
+ $p_dbfile = fopen($AutolinkDBCreationFile, 'x+'); # are to be created and populated in $AutolinkGroupDir; a list collected by
+ $p_dir = opendir($WorkDir); # by searching $WorkDir for filenames prepended with $AutolinkGroupname and
+ while (FALSE !== ($filename = readdir($p_dir))) # not flagged as deleted by a ',' nor contained in $AutolinkBannedPages.
+ { list($filename_group, $filename_page) = AutolinkDividePagefileName($filename); # Each pagename is prefixed with a signal for its stage in the database
+ if ($AutolinkGroupname == $filename_group # creation process: '?' (nothing done yet), '!' (file created and pagename
+ and !strpos($filename_page, ',') # regex written into it) and '-' (autolinks generated, work finished).
+ and !in_array($filename_page, $AutolinkBannedPages)) # If a database creation process is aborted (i.e., exceeded some time
+ fwrite($p_dbfile, '?'.$filename_page."\n"); } } # limit), it can be continued via an existing $AutolinkDBCreationFile.
+
+ fseek($p_dbfile, 0); # For each pagename in $AutolinkDBCreationFile prepended with a '?',
+ while (!feof($p_dbfile)) # create its regex and Autolink file.
+ { $position = ftell($p_dbfile); #
+ $line = fgets($p_dbfile); # But first check if a file for the pagename already exists from a
+ if ($line[0] == '?') # previous aborted attempt in $AutolinkGroupDir; as its creation has not
+ { $pagename = substr($line, 1, -1); # been flagged as finished by a '!', delete it before creating it anew.
+ $path = $AutolinkGroupDir.'/'.$pagename; #
+ if (file_exists($path)) # Fill the new Autolink file with a regex matching the pagename according
+ unlink($path); # to AutolinkGenerateRegex().
+ $regex = AutolinkGenerateRegex($pagename); #
+ $p_file = fopen($path, 'x'); # Further add two newlines to each file to open lines to be populated by
+ fwrite($p_file, $regex."\n\n"); # autolinks-in and autolinks-out later on.
+ fclose($p_file); #
+ fseek($p_dbfile, $position); # Then jump back to the beginning of the currently processed pagename
+ fwrite($p_dbfile, '!'); # line in $AutolinkDBCreationFile and overwrite the '?' flag with a '!'.
+ fgets($p_dbfile); } } #
+
+ fseek($p_dbfile, 0); # When all pagenames in $AutolinkDBCreationFile have passed the file and
+ $regex_data = array(); # regex generation stage, we use $AutolinkDBCreationFile as an index to
+ while (!feof($p_dbfile)) # all files in $AutolinkGroupDir and collect all the regexes contained
+ { $filename = substr(fgets($p_dbfile), 1, -1); # therein in $regex_data, keyed to their respective pagename.
+ if ($filename)
+ $regex_data[$filename] = AutolinkGetRegexFromFile($filename); }
+
+ fseek($p_dbfile, 0); # For each $pagename in $AutolinkDBCreationFile prepended with a '!',
+ while (!feof($p_dbfile)) # get the 'text=' field of its mirror file in $WorkDir and check the
+ { $position = ftell($p_dbfile); # regexes from $regex_data against the PmWiki page text to be found
+ $line = fgets($p_dbfile); # therein.
+ if ($line[0] == '!') #
+ { $pagename = substr($line, 1, -1); # Collect each match as an autolink from the page whose 'text=' field we
+ $text = AutolinkGetTextField($pagename); # just searched to the page whose pagename is keyed to the regex. Ignore
+ $autolinks = array(); # self-autolinks.
+ foreach ($regex_data as $referenced => $regex) #
+ if (preg_match('/'.$regex.'/iu', $text) and $referenced != $pagename) # Thus, for each $pagename in $AutolinkDBCreationFile, an array $autolinks
+ $autolinks[] = $referenced; # gets filled. We add its contained autolinked pagenames into the
+ AutolinkToFilesAddReferencesOnLine(array($pagename), $autolinks, 1); # autolinks-out field of $pagename's Autolink file and, vice versa,
+ AutolinkToFilesAddReferencesOnLine($autolinks, array($pagename), 2); # $pagename into the autolinks-in field of all pages referenced by it.
+ fseek($p_dbfile, $position); #
+ fwrite($p_dbfile, '-'); # Then jump back to the beginning of the currently processed pagename
+ fgets($p_dbfile); } } # line in $AutolinkDBCreationFile and overwrite the '!' flag with a '-'.
+
+ SortAutolinkData(); # Sort all the Autolink files just created and each sortable field in them.
+
+ fclose($p_dbfile);
+ unlink($AutolinkDBCreationFile); } # Database creation process finished: Delete $AutolinkDBCreationFile.
+
+
+
+ function AutolinkUpdatePage($pagename, $page, $new)
+ # Update the Autolink database for each PmWiki page update. Create or delete files as needed and add or remove references between pages.
+ { global $Now, $WorkDir, $IsPagePosted, $DeleteKeyPattern, $AutolinkGroupDir, $AutolinkGroupname, $AutolinkPagename, $AutolinkPath, $AutolinkBannedPages;
+ if (!$IsPagePosted or in_array($AutolinkPagename, $AutolinkBannedPages)) return; # Only act if a page update actually got posted and the page is not banned.
+
+ if (preg_match("/$DeleteKeyPattern/", $new['text'])) # If a new page's text fits PmWiki's $DeleteKeyPattern for page deletion,
+ { AutolinkPageDeletionWork($AutolinkPath); return; } # AutolinkPageDeletionWork() purges the database of all the page's traces.
+
+ if ($page['ctime'] == $Now) # If page creation time is $Now, a new page is being created: Extra setup
+ { $diff_in = AutolinkPageCreationWork($new); $diff_out = ''; } # work from AutolinkPageCreationWork() is needed. In return, we get
+ else list($diff_in, $diff_out) = AutolinkGetDiff($page, $new, $Now); # $diff_in and $diff_out cheaply; else, we have to ask AutolinkGetDiff().
+
+ $p_dir = opendir($AutolinkGroupDir); $new_links_out = array(); $regex_data = array(); # Check $diff_in for possibly new autolinks-out:
+ while (FALSE !== ($filename = readdir($p_dir))) #
+ if ($filename[0] != '.' and $filename != $AutolinkPagename) # Get the regexes from each file in $AutolinkGroupDir.
+ { $regex = AutolinkGetRegexFromFile($filename); # Match these regexes against $diff_in.
+ if (preg_match('/'.$regex.'/iu', $diff_in)) $new_links_out[] = $filename; } # Collect matches in $new_autolinks_out.
+ closedir($p_dir);
+
+ $content = explode("\n", file_get_contents($AutolinkPath));
+ $old_links_out = explode(',', $content[1]);
+
+ if (!empty($new_links_out)) # Add to the database any autolinks new due to the page update:
+ { $new_links_out = array_diff($new_links_out, array_intersect($old_links_out, $new_links_out)); #
+ $content[1] = implode(',', array_merge($old_links_out, $new_links_out)); # Compare any possible $new_links_out against $old_links_out and keep only
+ file_put_contents($AutolinkPath, implode("\n", $content)); # the truly new ones. Add them to the page's file autolinks-out field.
+ AutolinkToFilesAddReferencesOnLine($new_links_out, array($AutolinkPagename), 2); # Add the current page's new autolinks-out as autolinks-in on the newly
+ SortAutolinkData(array($AutolinkPagename), 1); # referenced pages. Sort the current page's changed autolinks-out field
+ SortAutolinkData($new_links_out, 2); } # and the autolinks-in fields of all newly referenced pages.
+
+ $unset_links_out = array(); # Purge from the database autolinks no longer valid due to the page update:
+ foreach ($old_links_out as $old_link_out) #
+ { $regex = AutolinkGetRegexFromFile($old_link_out); # Get the regexes for every autolink-out in $old_links_out. Check for
+ if (preg_match('/'.$regex.'/iu', $diff_out) and !preg_match('/'.$regex.'/i', $new['text'])) # matches to these in the deleted lines of $diff_out. For each matching,
+ $unset_links_out[] = $old_link_out; } # regex, check if any match still occurs in the page's new text. If not,
+ if (!empty($unset_links_out)) # $unset_links_out collects the pagename to the regex. Delete autolinks
+ { AutolinkFromFilesDeleteReferencesOnLine(array($AutolinkPagename), $unset_links_out, 1); # from $unset_links_out: as autolinks_out in the current page; and as
+ AutolinkFromFilesDeleteReferencesOnLine($unset_links_out, array($AutolinkPagename), 2); } } # autolinks-in from the current page in all previously referenced pages.
+
+
+
+ function AutolinkPageCreationWork($new)
+ # Called by AutolinkUpdatePage() in case it handles a newly created page. Create the page's Autolink file and its name's regex and check for autolinks-in from other pages.
+ { global $AutolinkGroupDir, $AutolinkPagename, $AutolinkPath, $AutolinkDir;
+ $regex = AutolinkGenerateRegex($AutolinkPagename); # Generate a regex for the new pagename.
+
+ $autolinks_in = array();
+ $p_dir = opendir($AutolinkGroupDir);
+ while (FALSE !== ($filename = readdir($p_dir))) # Collect possible autolinks-in to the new page:
+ if ($filename[0] != '.') #
+ { $text = AutolinkGetTextField($filename); # Match the pagename's regex against the 'text=' fields of all the
+ if (preg_match('/'.$regex.'/iu', $text)) # $WorkDir pagefiles mirrored in the Autolink directory and
+ $autolinks_in[] = $filename; } # belonging to the same group.
+ closedir($p_dir);
+
+ $p_file = fopen($AutolinkPath, 'x'); # Create a file for the new page in the Autolink directory.
+ fwrite($p_file, $regex."\n\n".implode(',', $autolinks_in)); # Write the regex into the first line. Write the autolinks-in
+ fclose($p_file); # from other pages that we just collected into the third line.
+
+ AutolinkToFilesAddReferencesOnLine($autolinks_in, array($AutolinkPagename), 1); # Mirror the new autolinks-in in the backlinking pages' autolink-out.
+
+ SortAutolinkData(array($AutolinkPagename), 2); # Sort the new page's autolinks-in field and
+ SortAutolinkData($sort_autolinks_out); # all the backlinking pages' autolinks-out fields.
+
+ return $new['text']; } # We intend to fill $newtext in $AutolinkUpdatePage() with $new['text'].
+
+
+
+ function AutolinkPageDeletionWork()
+ # Called by AutolinkUpdatePage() to handle page deletions. Purge all autolinks caused by the deleted page in other page's Autolink files. Then delete its own Autolink file.
+ { global $AutolinkPagename, $AutolinkPath;
+
+ $content = explode("\n", file_get_contents($AutolinkPath));
+ $links_out = explode(',', $content[1]);
+ $links_in = explode(',', $content[2]);
+
+ AutolinkFromFilesDeleteReferencesOnLine($links_out, array($AutolinkPagename), 2);
+ AutolinkFromFilesDeleteReferencesOnLine($links_in , array($AutolinkPagename), 1);
+
+ unlink($AutolinkPath); }
+
+
+
+ function AutolinkGenerateRegex($pagename)
+ # Generate a regex pattern matching $pagename. This function contains most of Autolink's pattern recognition logic. (The other part can be found in AutolinkSet().)
+ { global $AutolinkSuffixTolerance, $AutolinkMinimalRoot, $AutolinkUmlautTable, $AutolinkEncoding, $AutolinkGapsToAllowEasy, $AutolinkGapsToAllowLong, $AutolinkGapsToAllowHard;
+
+ $regex = preg_replace( '/(-+)/', '!', $pagename); # Identify possible breaking points for dividing the pagename into parts:
+ $regex = preg_replace( '/([0-9])([A-Za-z])/', '$1!$2', $regex); # - any '-'
+ $regex = preg_replace('/([A-Za-z])([0-9])/', '$1!$2', $regex); # - alphabetical characters followed by digits, and vice versa
+ $regex = preg_replace('/([A-Za-z])(?=[A-Z])/', '$1!', $regex); # - any alphabetical character followed by any uppercase one
+
+ $legal_umlauts = ''; # Umlauts allowed to be matched in matching tolerances generated in the
+ foreach($AutolinkUmlautTable as $umlaut => $translation) # next step. The first character in $AutolinkUmlautTable's $umlaut key is
+ $legal_umlauts .= mb_substr($umlaut, 0, 1, $AutolinkEncoding); # always the lowercase version of said umlaut, and here we only need that.
+
+ $regex_parts = explode('!', $regex); # Build toleration for suffixes:
+ $minimal_root = $AutolinkMinimalRoot; #
+ foreach ($regex_parts as &$part) #
+ { # Divide $regex into parts (delimited by '!') to cycle through.
+ if (strpos('0123456789', $part[0]) === FALSE) #
+ { $ln_part = strlen($part); # $minimal_root delimits the important first letters of the pagename that
+ $ln_static = min($minimal_root, $ln_part); # absolutely have to stay identical. It can be longer than the first of the
+ $minimal_root -= $ln_static; # parts cycled; unused surplus length is carried over to the next cycle.
+ $ln_flexible = $ln_part - $ln_static; #
+ # $replace_tolerance defines the length of $part's ending that can be left
+ $replace_tolerance = $AutolinkSuffixTolerance; # out or replaced. Must not be longer than half of $part's initial length,
+ while ($replace_tolerance > 0) # nor longer than $AutolinkSuffixTolerance allows, nor reach into any part
+ { if (($ln_flexible >= $replace_tolerance) and ($ln_part >= 2 * $replace_tolerance)) # of $part defined as static due to $minimal_root.
+ { $part = substr($part, 0, -$replace_tolerance); #
+ break; } # Allow a number of characters, alphabetical and umlaut ones, to replace
+ $replace_tolerance--; } # and/or add to $part's end. The number is the sum of $replace_tolerance
+ $tolerance_sum = min($ln_part, ($replace_tolerance + $AutolinkSuffixTolerance)); # plus $AutolinkSuffixTolerance or the remaining initial of $part,
+ $part .= '[a-z'.$legal_umlauts.']{0,'.$tolerance_sum.'}'; } # depending on what is lowest.
+ #
+ else $part .= '[a-z'.$legal_umlauts.']{0,'.$AutolinkSuffixTolerance.'}'; } # Don't *replace* anything in a numerical $part. Never change numbers!
+
+ $gaps_to_allow = $AutolinkGapsToAllowEasy; # Glue parts together with a regex for possible gap characters in-between.
+ foreach ($AutolinkGapsToAllowHard as $char) # $AutolinkGapsToAllowEasy is for single characters that do not interfere
+ $gaps_to_allow .= '\\'.$char; # with regex syntax. $AutolinkGapsToAllowHard is for characters interfering
+ $gaps_to_allow = '['.$gaps_to_allow.'\-]'; # with regex syntax, so those are prepended with an escape backslash.
+ if (!empty($AutolinkGapsToAllowLong)) # $AutolinkGapsToAllowLong is for multi-character entities, like '$amp;'.
+ $gaps_to_allow = '(('.implode(')|(', $AutolinkGapsToAllowLong).')|'.$gaps_to_allow.')'; # For hyphens, gap tolerance is hard-coded: Above, we assumed them as a
+ $regex = implode($gaps_to_allow.'*', $regex_parts); # valid pagename delimiter, and AutolinkSet()'s logic also expects them.
+
+ foreach ($AutolinkUmlautTable as $umlaut => $transl) #
+ { $umlaut_lower = mb_substr($umlaut, 0, 1, $AutolinkEncoding); # Character sequences in the pagename that could be interpreted as an
+ $umlaut_upper = mb_substr($umlaut, 1, 1, $AutolinkEncoding); # umlaut translated back into an ASCII transcription are replaced by
+ # a regex allowing for both the umlaut and its possible transcriptions,
+ $transl_lower = $transl[0]; # including the original character sequence.
+ $transl_upper = $transl[1]; #
+ if (strlen($transl_lower) > 1) $transl_lower = '('.$transl_lower.')|'.$transl_lower[0]; # For this, $AutolinkUmlautTable provides key => value pairs, each of a
+ if (strlen($transl_upper) > 1) $transl_upper = '('.$transl_upper.')|'.$transl_upper[0]; # string composed of one ('ß') or two ('äÄ', lowercase is always first)
+ # possible cases of each umlaut, pointing to a value array of the standard
+ $regex = str_replace($transl[0], '('.$transl_lower.'|'.$umlaut_lower.')', $regex); # ASCII translation each of the lowercase umlaut and, if existing, the
+ if ($umlaut_upper != '') # uppercase umlaut.
+ $regex = str_replace($transl[1], '('.$transl_upper.'|'.$umlaut_upper.')', $regex); } #
+
+ return $regex; }
+
+
+
+ function SortAutolinkData($selection=array(), $sort_flag = 0)
+ # Sort autolink references fields in Autolink files. Determines the order pagenames are compared in and some page matches are selected over each other in AutolinkSet().
+ { global $AutolinkGroupDir;
+
+ if (empty($selection)) #
+ { $p_dir = opendir($AutolinkGroupDir); #
+ while (FALSE !== ($filename = readdir($p_dir))) # $selection states what Autolink files in $$AutolinkGroupDir need sorting.
+ if ($filename[0] != '.') # If $selection is empty, assume all of the visible files are asked for.
+ $selection[] = $filename; #
+ closedir($p_dir); } #
+
+ foreach ($selection as $filename) # Go into each file referenced by $selection and, according to $sort_flag,
+ { $path = $AutolinkGroupDir.'/'.$filename; # 0 - sort both the autolinks-out and the autolinks-in field
+ $content = explode("\n", file_get_contents($path)); # 1 - sort the autolinks-out field
+ if ($sort_flag != 2) { $autolinks_out = explode(',', $content[1]); # 2 - sort the autolinks-in field
+ usort($autolinks_out, 'AutolinkSortByLengthAlphabetAndCase'); #
+ $content[1] = implode(',', $autolinks_out); } # Take a look at AutolinkSortByLengthAlphabetAndCase(): Its sort logic is
+ if ($sort_flag != 1) { $autolinks_in = explode(',', $content[2]); # important to Autolink's preference of some pagename matches over others,
+ usort($autolinks_in , 'AutolinkSortByLengthAlphabetAndCase'); # as AutolinkSet() iterates over the autolinks-out list from top to bottom
+ $content[2] = implode(',', $autolinks_in ); } # and, for some cases, will take the first match instead of checking the
+ file_put_contents($path, implode("\n", $content)); } } # whole list.
+
+
+
+ function AutolinkRepairData()
+ # Files in $AutolinkGroupDir prepended with 'REPAIR.' replace their un-prepended versions. No autolink dependency check; just catching aborted file_put_content() processes.
+ { global $AutolinkGroupDir;
+
+ $p_dir = opendir($AutolinkGroupDir);
+ while (FALSE !== ($filename = readdir($p_dir)))
+ if (substr($filename, 0, 7) == 'REPAIR.')
+ { $path_repair = $AutolinkGroupDir.'/'.$filename;
+ $content = file_get_contents($path_repair);
+ $original_filename = substr($filename, 7);
+ $path_original = $AutolinkGroupDir.'/'.$original_filename;
+ file_put_contents($path_original, $content);
+ unlink($path_repair); } }
+
+
+
+ ########################################################
+ # Autolink database manipulation: helper functions #
+ ########################################################
+
+ function AutolinkSortByLengthAlphabetAndCase($a, $b)
+ # Comparision function called by the sort functions in AutolinkSortData(). Try to sort by stringlength, then follow sort() (to achieve a comparison uppercase vs. lowercase).
+ { $strlen_a = strlen($a);
+ $strlen_b = strlen($b);
+ if ($strlen_a %3c $strlen_b) return 1;
+ elseif ($strlen_a > $strlen_b) return -1;
+
+ $sort = array($a, $b);
+ sort($sort);
+ if ($sort[0] == $a) return -1;
+ return 1; }
+
+
+
+ function AutolinkToFilesAddReferencesOnLine($filenames, $add_references, $line)
+ # To each Autolink file of $AutolinkGroupDir in $filenames, add $new_references to $line.
+ { global $AutolinkGroupDir;
+
+ $filenames = array_diff($filenames, array_intersect(array(''), $filenames)); # Take care of the empty array elements that explode() like to produce.
+
+ foreach ($filenames as $filename)
+ { $path = $AutolinkGroupDir.'/'.$filename;
+ $content = explode("\n", file_get_contents($path));
+
+ $old_references = explode(',', $content[$line]);
+ if ($old_references[0])
+ $references = array_merge($old_references, $add_references);
+ else
+ $references = $add_references;
+
+ $content[$line] = implode(',', array_unique($references));
+ $content = implode("\n", $content);
+
+ $path_repair = $AutolinkGroupDir.'/REPAIR.'.$filename; # Don't write to original file directly. If for any reason (like timeouts
+ file_put_contents($path_repair, $content); # to be expected for database creation) its process is aborted, its
+ file_put_contents($path, $content); # overwriting may result in empty files and critical data distortion.
+ unlink($path_repair); } } # AutolinkActivate() is expected to always call AutolinkRepairData().
+
+
+
+ function AutolinkFromFilesDeleteReferencesOnLine($filenames, $del_references, $line)
+ # From each Autolink file of $AutolinkGroupDir in $filenames, delete $del_references from $line.
+ { global $AutolinkGroupDir;
+
+ $filenames = array_diff($filenames, array_intersect(array(''), $filenames)); # Take care of the empty array elements that explode() like to produce.
+
+ foreach ($filenames as $filename)
+ { $path = $AutolinkGroupDir.'/'.$filename;
+ $content = explode("\n", file_get_contents($path));
+
+ $old_references = explode(',', $content[$line]);
+ $references = array_diff($old_references, $del_references);
+
+ $content[$line] = implode(',', $references);
+ $content = implode("\n", $content);
+
+ $path_repair = $AutolinkGroupDir.'/REPAIR.'.$filename; # See comment in AutolinkToFilesAddReferencesOnLine() on this.
+ file_put_contents($path_repair, $content); #
+ file_put_contents($path, $content); #
+ unlink($path_repair); } } #
+
+
+
+ function AutolinkGetTextField($pagename)
+ # Get the content of the 'text=' field from a page referenced by $filename.
+ { global $WorkDir, $AutolinkGroupname;
+
+ $p_file = fopen($WorkDir.'/'.$AutolinkGroupname.'.'.$pagename, 'r');
+ $TextFound = FALSE;
+ while (!feof($p_file) and !$TextFound)
+ { $line = fgets($p_file);
+ if (substr($line, 0, 5) == 'text=') $TextFound = TRUE; }
+ fclose($p_file);
+ return substr($line, 5); }
+
+
+
+ function AutolinkGetDiff($page, $new, $time)
+ # Collect from a PmWiki $page the diff for a given $time in the form of two strings to return: $diff_in for all the text added and $diff_out for all the text deleted.
+ { $diff = $new["diff:$time:{$page['time']}:$diffclass"];
+ $lines = explode("\n", $diff);
+
+ $diff_lines_in = array();
+ $diff_lines_out = array();
+ foreach ($lines as $line)
+ { $diff_flag = substr($line, 0, 1);
+ if ($diff_flag == '%3c') $diff_lines_in[] = substr($line, 2);
+ elseif ($diff_flag == '>') $diff_lines_out[] = substr($line, 2); }
+
+ $diff_in = implode("\n", $diff_lines_in );
+ $diff_out = implode("\n", $diff_lines_out);
+ return array($diff_in, $diff_out); }
+ @]