Commit ded69477 authored by Florian Staudacher's avatar Florian Staudacher

replace the monstrous link regex with a slightly less complicated one,

based on the one from the markdown parser, fixes #3553
adding a list of urls for testing the regex in the specs
parent e6ec4193
// cache url regex globally, for direct acces when testing
$(function() {
Diaspora.url_regex = /(^|\s)\b((?:(?:https?|ftp):(?:\/{1,3})|www\.)(?:[^"<>\)\s]|\(([^\s()<>]+|(\([^\s()<>]+\)))\))+)(?=\s|$)/gi;
});
(function(){
//make it so I take text and mentions rather than the modelapp.helpers.textFormatter(
var textFormatter = function textFormatter(text, model) {
......@@ -17,20 +23,29 @@
converter.hooks.chain("preConversion", function(text) {
// add < > around plain urls, effectively making them "autolinks"
// regex copied from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls (slightly modified)
var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi;
text = text.replace(urlRegex, function(wholematch, space, url) {
if( url.match(/^[^\w]/) ) return wholematch; // evil witchcraft, noop
return space+"<"+url+">";
text = text.replace(Diaspora.url_regex, function() {
var url = arguments[2];
if( url.match(/^[^\w]/) ) return url; // evil witchcraft, noop
return arguments[1]+"<"+url+">";
});
// process links
// regex copied from: https://code.google.com/p/pagedown/source/browse/Markdown.Converter.js#1198 (and slightly expanded)
var linkRegex = /(\[.*\]:\s)?(<|\()((?:(https?|ftp):\/\/[^\/'">\s]|www)[^'">\s]+?)(>|\))/gi;
var linkRegex = /(\[.*\]:\s)?(<|\()((?:(https?|ftp):\/\/[^\/'">\s]|www)[^'">\s]+?)([>\)]{1,2})/gi;
text = text.replace(linkRegex, function() {
var unicodeUrl = arguments[3];
var urlSuffix = arguments[5];
unicodeUrl = ( unicodeUrl.match(/^www/) ) ? ('http://' + unicodeUrl) : unicodeUrl;
// handle parentheses, especially in case the link ends with ')'
if( urlSuffix.indexOf(')') != -1 && urlSuffix.indexOf('>') != -1 ) {
unicodeUrl += ')';
urlSuffix = '>';
}
// markdown doesn't like '(' or ')' anywhere, except where it wants
var workingUrl = unicodeUrl.replace(/\(/, "%28").replace(/\)/, "%29");
var addr = parse_url(unicodeUrl);
if( !addr.host ) addr.host = ""; // must not be 'undefined'
......@@ -45,8 +60,8 @@
(!addr.query ? '' : '?' + encodeURI(addr.query) ) +
(!addr.fragment ? '' : '#' + encodeURI(addr.fragment) );
if( !arguments[1] || arguments[1] == "") { // inline link
if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text
else return arguments[2]+asciiUrl+arguments[5]; // with link text
if(arguments[2] == "<") return "["+workingUrl+"]("+asciiUrl+")"; // without link text
else return arguments[2]+asciiUrl+urlSuffix; // with link text
} else { // reference style link
return arguments[1]+asciiUrl;
}
......
#
# http://mathiasbynens.be/demo/url-regex
# lines starting with '#' are ignored
#
http://
http://.
http://..
http://../
http://?
http://??
http://??/
http://#
http://##
http://##/
http://foo.bar?q=Spaces should be encoded
//
//a
///a
///
http:///a
foo.com
rdar://1234
h://test
http:// shouldfail.com
:// should fail
http://foo.bar/foo(bar)baz quux
ftps://foo.bar/
http://-error-.invalid/
http://a.b--c.de/
http://-a.b.co
http://a.b-.co
http://0.0.0.0
http://10.1.1.0
http://10.1.1.255
http://224.1.1.1
http://1.1.1.1.1
http://123.123.123
http://3628126748
http://.www.foo.bar/
http://www.foo.bar./
http://.www.foo.bar./
http://10.1.1.1
http://10.1.1.254
\ No newline at end of file
#
# http://mathiasbynens.be/demo/url-regex
# lines starting with '#' are ignored
#
http://foo.com/blah_blah
http://foo.com/blah_blah/
http://foo.com/blah_blah_(wikipedia)
http://foo.com/blah_blah_(wikipedia)_(again)
http://www.example.com/wpstyle/?p=364
https://www.example.com/foo/?bar=baz&inga=42&quux
http://✪df.ws/123
http://userid:password@example.com:8080
http://userid:password@example.com:8080/
http://userid@example.com
http://userid@example.com/
http://userid@example.com:8080
http://userid@example.com:8080/
http://userid:password@example.com
http://userid:password@example.com/
http://142.42.1.1/
http://142.42.1.1:8080/
http://➡.ws/䨹
http://⌘.ws
http://⌘.ws/
http://foo.com/blah_(wikipedia)#cite-1
http://foo.com/blah_(wikipedia)_blah#cite-1
http://foo.com/unicode_(✪)_in_parens
http://foo.com/(something)?after=parens
http://☺.damowmow.com/
http://code.google.com/events/#&product=browser
http://j.mp
ftp://foo.bar/baz
http://foo.bar/?q=Test%20URL-encoded%20stuff
http://مثال.إختبار
http://例子.测试
http://उदाहरण.परीक्षा
# http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com
http://1337.net
http://a.b-c.de
http://223.255.255.254
\ No newline at end of file
......@@ -41,7 +41,7 @@ describe("app.helpers.textFormatter", function(){
//
// var formattedText = this.formatter.markdownify(links.join(" "))
var formattedText = this.formatter.markdownify(links.join(" and "))
var formattedText = this.formatter.markdownify(links.join(" and "));
var wrapper = $("<div>").html(formattedText);
_.each(links, function(link) {
......@@ -87,14 +87,16 @@ describe("app.helpers.textFormatter", function(){
"http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg",
"http://موقع.وزارة-الاتصالات.مصر/", // example from #3082
"http:///scholar.google.com/citations?view_op=top_venues",
"http://lyricstranslate.com/en/someone-you-നിന്നെ-പോലൊരാള്‍.html" // example from #3063
"http://lyricstranslate.com/en/someone-you-നിന്നെ-പോലൊരാള്‍.html", // example from #3063,
"http://de.wikipedia.org/wiki/Liste_der_Abkürzungen_(Netzjargon)" // #3645
];
this.asciiUrls = [
"http://www.xn--brgerentscheid-krankenhuser-xkc78d.de",
"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg",
"http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/",
"http:///scholar.google.com/citations?view_op=top_venues",
"http://lyricstranslate.com/en/someone-you-%E0%B4%A8%E0%B4%BF%E0%B4%A8%E0%B5%8D%E0%B4%A8%E0%B5%86-%E0%B4%AA%E0%B5%8B%E0%B4%B2%E0%B5%8A%E0%B4%B0%E0%B4%BE%E0%B4%B3%E0%B5%8D%E2%80%8D.html"
"http://lyricstranslate.com/en/someone-you-%E0%B4%A8%E0%B4%BF%E0%B4%A8%E0%B5%8D%E0%B4%A8%E0%B5%86-%E0%B4%AA%E0%B5%8B%E0%B4%B2%E0%B5%8A%E0%B4%B0%E0%B4%BE%E0%B4%B3%E0%B5%8D%E2%80%8D.html",
"http://de.wikipedia.org/wiki/Liste_der_Abk%C3%BCrzungen_%28Netzjargon%29"
];
});
......@@ -141,6 +143,46 @@ describe("app.helpers.textFormatter", function(){
});
context("misc breakage and/or other issues with weird urls", function(){
it("doesn't crash Chromium - RUN ME WITH CHROMIUM! (issue #3553)", function() {
var text_part = 'Revert "rails admin is conflicting with client side validations: see https://github.com/sferik/rails_admin/issues/985"';
var link_part = 'https://github.com/diaspora/diaspora/commit/61f40fc6bfe6bb859c995023b5a17d22c9b5e6e5';
var content = '['+text_part+']('+link_part+')';
var parsed = this.formatter.markdownify(content);
var link = 'href="' + link_part + '"';
var text = '>'+ text_part +'<';
expect(parsed).toContain(link);
expect(parsed).toContain(text);
});
it("tests a bunch of benchmark urls", function(){
var self = this;
$.ajax({
async: false,
cache: false,
url: '/spec/fixtures/good_urls.txt',
success: function(data) { self.url_list = data.split("\n"); }
});
_.each(this.url_list, function(url) {
// 'comments'
if( url.match(/^#/) ) return;
// regex.test is stupid, use match and boolean-ify it
var result = !!url.match(Diaspora.url_regex);
expect(result).toBeTruthy();
if( !result && console && console.log ) {
console.log(url);
}
});
});
// TODO: try to match the 'bad_urls.txt' and have as few matches as possible
});
})
describe(".hashtagify", function(){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment