Commit 74a6f425 authored by Jonne Haß's avatar Jonne Haß
Browse files

Bye opengraph_parser, hi open_graph_reader

opengraph_parser is basically unmainted, issues are ignored
or deliberately closed without fixing. It pollutes the global
namespace and has no verification of correctness.

The opengraph gem has basically the same issues, not really maintained,
unreleased patches on master since over a year, not really smart either.

So I created my own version and while at it, why not strive try to be
complete and robust, although it's still a work in progress.

This also improves general URL detection by parsing them
from the message after stripping markdown.

An additional dependency was added to support
fetching sites that require cookies to work at all.
For the same reason Faraday's default redirect limit was
bumped.
parent 60a5d2a5
......@@ -110,12 +110,12 @@ gem 'messagebus_ruby_api', '1.0.3'
# Parsing
gem 'nokogiri', '1.6.4.1'
gem 'redcarpet', '3.2.0'
gem 'twitter-text', '1.10.0'
gem 'roxml', '3.1.6'
gem 'ruby-oembed', '0.8.11'
gem 'opengraph_parser', '0.2.3'
gem 'nokogiri', '1.6.4.1'
gem 'redcarpet', '3.2.0'
gem 'twitter-text', '1.10.0'
gem 'roxml', '3.1.6'
gem 'ruby-oembed', '0.8.11'
gem 'open_graph_reader', '0.3.1'
# Services
......@@ -140,6 +140,7 @@ gem 'acts-as-taggable-on', '3.4.2'
gem 'addressable', '2.3.6', :require => 'addressable/uri'
gem 'faraday', '0.9.0'
gem 'faraday_middleware', '0.9.1'
gem 'faraday-cookie_jar', '0.0.6'
gem 'typhoeus', '0.6.9'
# Views
......
......@@ -141,6 +141,8 @@ GEM
http_parser.rb (~> 0.6)
nokogiri (~> 1.6)
diff-lcs (1.2.5)
domain_name (0.5.22)
unf (>= 0.0.5, < 1.0.0)
eco (1.0.0)
coffee-script
eco-source
......@@ -165,6 +167,9 @@ GEM
railties (>= 3.0.0)
faraday (0.9.0)
multipart-post (>= 1.2, < 3)
faraday-cookie_jar (0.0.6)
faraday (>= 0.7.4)
http-cookie (~> 1.0.0)
faraday_middleware (0.9.1)
faraday (>= 0.7.4, < 0.10)
ffi (1.9.6)
......@@ -268,6 +273,8 @@ GEM
hike (1.2.3)
hiredis (0.5.2)
hitimes (1.2.2)
http-cookie (1.0.2)
domain_name (~> 0.5)
http_accept_language (2.0.2)
http_parser.rb (0.6.0)
i18n (0.6.11)
......@@ -361,9 +368,9 @@ GEM
omniauth-oauth (~> 1.0)
omniauth-wordpress (0.2.1)
omniauth-oauth2 (~> 1.1.0)
opengraph_parser (0.2.3)
addressable
nokogiri
open_graph_reader (0.3.1)
faraday (~> 0.9.0)
nokogiri (~> 1.6)
opennebula (4.10.1)
json
nokogiri
......@@ -618,6 +625,7 @@ DEPENDENCIES
entypo-rails (= 2.2.2)
factory_girl_rails (= 4.5.0)
faraday (= 0.9.0)
faraday-cookie_jar (= 0.0.6)
faraday_middleware (= 0.9.1)
fixture_builder (= 0.3.6)
fog (= 1.25.0)
......@@ -650,7 +658,7 @@ DEPENDENCIES
omniauth-tumblr (= 1.1)
omniauth-twitter (= 1.0.1)
omniauth-wordpress (= 0.2.1)
opengraph_parser (= 0.2.3)
open_graph_reader (= 0.3.1)
pry
pry-byebug
pry-debundle
......
......@@ -30,16 +30,17 @@ class OpenGraphCache < ActiveRecord::Base
end
def fetch_and_save_opengraph_data!
response = OpenGraph.new(self.url)
object = OpenGraphReader.fetch!(self.url)
return if response.blank? || response.type.blank?
return unless object
self.title = response.title.truncate(255)
self.ob_type = response.type
self.image = response.images[0]
self.url = response.url
self.description = response.description
self.title = object.og.title.truncate(255)
self.ob_type = object.og.type
self.image = object.og.image.url
self.url = object.og.url
self.description = object.og.description
self.save
rescue OpenGraphReader::NoOpenGraphDataError
end
end
......@@ -18,6 +18,7 @@ options = {
}
Faraday.default_connection = Faraday::Connection.new(options) do |b|
b.use FaradayMiddleware::FollowRedirects
b.use FaradayMiddleware::FollowRedirects, limit: 8
b.use :cookie_jar
b.adapter Faraday.default_adapter
end
OpenGraphReader.configure do |config|
config.synthesize_title = true
config.synthesize_image_url = true
end
......@@ -228,7 +228,7 @@ module Diaspora
# Extracts all the urls from the raw message and return them in the form of a string
# Different URLs are seperated with a space
def urls
@urls ||= Twitter::Extractor.extract_urls(@raw_message)
@urls ||= Twitter::Extractor.extract_urls(plain_text_without_markdown)
end
def raw
......
......@@ -178,5 +178,10 @@ describe Diaspora::MessageRenderer do
text = "[Perdu](http://perdu.com/) and [DuckDuckGo](https://duckduckgo.com/) can help you"
expect(message(text).urls).to eql ["http://perdu.com/", "https://duckduckgo.com/"]
end
it "extracts urls from continous markdown correctly" do
text = "[![Image](https://www.antifainfoblatt.de/sites/default/files/public/styles/front_full/public/jockpalfreeman.png?itok=OPjHKpmt)](https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C)"
expect(message(text).urls).to eq ["https://www.antifainfoblatt.de/sites/default/files/public/styles/front_full/public/jockpalfreeman.png?itok=OPjHKpmt", "https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C"]
end
end
end
......@@ -3,7 +3,7 @@ describe Workers::GatherOpenGraphData do
before do
@ogsite_title = 'Homepage'
@ogsite_type = 'website'
@ogsite_image = '/img/something.png'
@ogsite_image = 'http://www.we-support-open-graph.com/img/something.png'
@ogsite_url = 'http://www.we-support-open-graph.com'
@ogsite_description = 'Homepage'
......@@ -31,9 +31,12 @@ describe Workers::GatherOpenGraphData do
@status_message = FactoryGirl.create(:status_message)
stub_request(:get, @ogsite_url).to_return(:status => 200, :body => @ogsite_body)
stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => '<html><body>hello there</body></html>')
stub_request(:get, @oglong_url).to_return(:status => 200, :body => @oglong_body)
stub_request(:head, @ogsite_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'})
stub_request(:get, @ogsite_url).to_return(status: 200, body: @ogsite_body, headers: {'Content-Type' => 'text/html; utf-8'})
stub_request(:head, @no_open_graph_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'})
stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => '<html><head><title>Hi</title><body>hello there</body></html>', headers: {'Content-Type' => 'text/html; utf-8'})
stub_request(:head, @oglong_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'})
stub_request(:get, @oglong_url).to_return(status: 200, body: @oglong_body, headers: {'Content-Type' => 'text/html; utf-8'})
end
......@@ -59,7 +62,7 @@ describe Workers::GatherOpenGraphData do
expect(ogc.title).to eq(@ogsite_title)
expect(ogc.ob_type).to eq(@ogsite_type)
expect(ogc.image).to eq(@ogsite_url + @ogsite_image)
expect(ogc.image).to eq(@ogsite_image)
expect(ogc.url).to eq(@ogsite_url)
expect(ogc.description).to eq(@ogsite_description)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment