. URI schemes detected are: http, * https, ftp, sftp, news or mailto. The detection of conformance to RFC 3987 is * rigorous; the detection of conformance to the specific schemes is rough and * might result in some false positives. * * If you wish to implement your own link detection, you can skip the default * detection described above and supply your own link-detecting regular * expression as $wgTrustedLinksRegex. * * Best used to block spam by setting this right for a group of users trusted * not to spam links (e.g. as for the 'trusted' group below), and then adding * users to that group on demand using [[Special:UserRights]]. * * Installation: * * Create a "TrustedLinks" directory under the "extensions" directory of your * MediaWiki root and copy this file and the accompanying TrustedLinks.i18n.php * file into that directory. Copy the following text verbatim into your * LocalSettings.php file and edit it per the comments: require_once("$IP/extensions/TrustedLinks/TrustedLinks.php"); # Set permissions as desired - the following are typical use cases. $wgGroupPermissions['bureaucrat']['postlink'] = true; $wgGroupPermissions['sysop' ]['postlink'] = true; $wgGroupPermissions['trusted' ]['postlink'] = true; # Uncomment this to customise the regular expression that is used to extract # URLs from the raw wikitext (the full match should be a single URL). #$wgTrustedLinksRegex = '/yourregexhere/'; ## End TrustedLinks entries in LocalSettings.php * * Note that if you wish to log or be notified by email of failed link insertion * attempts, you can install the UntrustedLinksLogger extension. * * Changelog: * * Version 1.2: Edited credits url. * Version 1.1: Adjusted the regular expression to detect internationalised URIs * according to RFC 3987; added rough scheme-specific detection for * the schemes http, https, ftp, sftp, news and mailto. * Version 1.0: Initial version. * */ $wgHooks['EditFilter'][] = 'TrustedLinks::OnEditFilter'; $wgExtensionMessagesFiles['TrustedLinks'] = dirname(__FILE__).'/TrustedLinks.i18n.php'; $wgExtensionCredits['other']['TrustedLinks'] = array( 'path' => __FILE__, 'name' => 'TrustedLinks', 'author' => 'Laird Shaw based on code by James Paige', 'url' => 'http://creativeandcritical.net/trustedlinks/', 'descriptionmsg' => 'trustedlinks-desc', 'version' => '1.2', ); class TrustedLinks { # Comprehensive regex by eyelidlessness matching non-relative international URIs as shared here: # http://stackoverflow.com/questions/161738/what-is-the-best-regular-expression-to-check-if-a-string-is-a-valid-url # with backslashes escaped, and with a "u" modifier added as suggested by OmnipotentEntity here: # http://stackoverflow.com/questions/4337248/php-regexp-for-national-domains static $uriRegex = '/[a-z](?:[-a-z0-9\\+\\.])*:(?:\\/\\/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:])*@)?(?:\\[(?:(?:(?:[0-9a-f]{1,4}:){6}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|::(?:[0-9a-f]{1,4}:){5}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){4}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:[0-9a-f]{1,4}:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){3}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){2}(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::[0-9a-f]{1,4}:(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3})|(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}|(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})?::)|v[0-9a-f]+[-a-z0-9\\._~!\\$&\'\\(\\)\\*\\+,;=:]+)\\]|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(?:\\.(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}|(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=@])*)(?::[0-9]*)?(?:\\/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@]))*)*|\\/(?:(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@]))+)(?:\\/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@]))*)*)?|(?:(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@]))+)(?:\\/(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@]))*)*|(?!(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@])))(?:\\?(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@])|[\\x{E000}-\\x{F8FF}\\x{F0000}-\\x{FFFFD}|\\x{100000}-\\x{10FFFD}\\/\\?])*)?(?:\\#(?:(?:%[0-9a-f][0-9a-f]|[-a-z0-9\\._~\\x{A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}\\x{10000}-\\x{1FFFD}\\x{20000}-\\x{2FFFD}\\x{30000}-\\x{3FFFD}\\x{40000}-\\x{4FFFD}\\x{50000}-\\x{5FFFD}\\x{60000}-\\x{6FFFD}\\x{70000}-\\x{7FFFD}\\x{80000}-\\x{8FFFD}\\x{90000}-\\x{9FFFD}\\x{A0000}-\\x{AFFFD}\\x{B0000}-\\x{BFFFD}\\x{C0000}-\\x{CFFFD}\\x{D0000}-\\x{DFFFD}\\x{E1000}-\\x{EFFFD}!\\$&\'\\(\\)\\*\\+,;=:@])|[\\/\\?])*)?/iu'; /** * The entry point to the extension. Implements the EditFilter hook. */ static function OnEditFilter($EditPage, $newText, $section, &$hookError, $summary) { global $wgUser; $article = $EditPage->getArticle(); $oldText = $article->getRawText(); $addedUrls = self::getNewUrls($oldText, $newText); if ($addedUrls) { $mayPostUrls = $wgUser->isAllowed('postlink'); if (!$mayPostUrls) { $errorMessage = ''; // Run client hooks into UntrustedLinkAttempt. // A hook function return of false means to abort our planned cancellation of // the page save. A return of true means to continue our planned cancellation of // the page save, optionally with an error message generated by the hook // function. if (wfRunHooks('UntrustedLinkAttempt', array($EditPage, $addedUrls, $newText, &$errorMessage))) { if (!$errorMessage) { if (function_exists('wfLoadExtensionMessages')) { wfLoadExtensionMessages('TrustedLinks'); } $urlList = ''; if (function_exists('wfMessage')) { $errorMessage = wfMessage('trustedlinks_noright', $urlList)->plain(); } else $errorMessage = wfMsgReal('trustedlinks_noright', array($urlList), true, false, false); } // Setting $hookError indicates failure even though we're returning // true, and for the core code to display $hookError as an error message. $hookError = $errorMessage; } } } return true; } /** * Returns true if a URL is present in the string $newText that was not * present in the string $oldText, and false otherwise. */ static function wasLinkAdded($oldText, $newText) { return self::getNewUrls($oldText, $newText) ? true : false; } /** * Returns a complete list of unique URLs present in the string $newText * that were not present in the string $oldText. */ static function getNewUrls($oldText, $newText) { $oldTextArr = explode("\n", $oldText); $newTextArr = explode("\n", $newText); $newOrChangedLines = self::getNewOrChangedLines($oldTextArr, $newTextArr); $urlsInNewOrChangedLines = self::extractUrls($newOrChangedLines); if (!$urlsInNewOrChangedLines) { return array(); } else { $preExistingUrls = self::extractUrls($oldTextArr); return array_diff($urlsInNewOrChangedLines, $preExistingUrls); } } /** * Returns a complete list of unique URLs present in the array of * strings that is $lines. Matches are performed against * $wgTrustedLinksRegex if set, otherwise using a default regex with * post-processing. */ static function extractUrls($lines) { global $wgTrustedLinksRegex; $urlRegex = isset($wgTrustedLinksRegex) ? $wgTrustedLinksRegex : self::$uriRegex; $urls = array(); foreach ($lines as $line) { if (preg_match_all($urlRegex, $line, $matches, PREG_PATTERN_ORDER)) { foreach ($matches[0] as $url) { $valid = isset($wgTrustedLinksRegex); // Roughly validate the URI as one of the schemes http, https, ftp, // sftp, news or mailto. The regular expression below comes directly out // of appendix B of RFC 3986. The validation is pretty basic and it // might generate a few false positives. We simply check that the first // four scheme types have an authority component i.e. a host name with // an optional username/password and/or port, and that the last two // scheme types do *not* have an authority component, but *do* have a // path component. if (!isset($wgTrustedLinksRegex) && preg_match('/^(([^:\\/?#]+):)?(\\/\\/([^\\/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?/', $url, $urlMatches)) { $scheme = isset($urlMatches[2]) ? $urlMatches[2] : null; $authority = isset($urlMatches[4]) ? $urlMatches[4] : null; $path = isset($urlMatches[5]) ? $urlMatches[5] : null; $query = isset($urlMatches[7]) ? $urlMatches[7] : null; $fragment = isset($urlMatches[9]) ? $urlMatches[9] : null; if (in_array($scheme, array('http', 'https', 'ftp', 'sftp'))) { if (!empty($authority)) $valid = true; } else if (in_array($scheme, array('news', 'mailto'))) { // Email validation commented out because it is not // enough to validate as a single address - per // RFC 2368, multiple comma-separated addresses are // permitted, 'possibly including "phrase" and // "comment" components'. For now, leave this as an // over-permissive validation. if (empty($authority) && !empty($path)/* && ($scheme != 'email' || filter_var($path, FILTER_VALIDATE_EMAIL) )*/ ) $valid = true; } } else /* Should never get here as we know the URI to be well-formed due to its being detected by the rigorous self:$uriRegex, so that it must be parseable by the regular expression above. */; if ($valid && !in_array($url, $urls)) $urls[] = $url; } } } return $urls; } /** * Returns an array of lines (strings) which are either new or changed * in the diff between the arrays of strings $oldTextArr and $newTextArr. */ static function getNewOrChangedLines($oldTextArr, $newTextArr) { $WikiDiff3 = new WikiDiff3(); $diffRanges = $WikiDiff3->diff_range($oldTextArr, $newTextArr); $newOrChangedLines = array(); foreach ($diffRanges as $diffRange) { for ($i = $diffRange->rightstart; $i < $diffRange->rightend; $i++) { $newOrChangedLines[] = $newTextArr[$i]; } } return $newOrChangedLines; } } ?>