Commit b7d67f5e authored by David Morley's avatar David Morley

crawl the h tags on body and send them thru a real language detector

parent c6826ae2
......@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"content-hash": "b4dea96de8556414616e04d0f09aa8fa",
"content-hash": "38845798152ede56127d2b651f4fdf00",
"packages": [
{
"name": "commerceguys/enum",
......@@ -100,6 +100,53 @@
],
"time": "2018-03-29T19:57:20+00:00"
},
{
"name": "detectlanguage/detectlanguage",
"version": "2.2.0",
"source": {
"type": "git",
"url": "https://github.com/detectlanguage/detectlanguage-php.git",
"reference": "a410dc1cfb31a9e332bd779c1cb27b1af2ab9b18"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/detectlanguage/detectlanguage-php/zipball/a410dc1cfb31a9e332bd779c1cb27b1af2ab9b18",
"reference": "a410dc1cfb31a9e332bd779c1cb27b1af2ab9b18",
"shasum": ""
},
"require": {
"ext-json": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "3.7.*"
},
"type": "library",
"autoload": {
"psr-0": {
"DetectLanguage": "lib/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Laurynas Butkus"
}
],
"description": "Language Detection API PHP Client",
"homepage": "https://github.com/detectlanguage/detectlanguage-php",
"keywords": [
"api",
"client",
"detect",
"detection",
"language"
],
"time": "2018-03-28T20:56:36+00:00"
},
{
"name": "gabordemooij/redbean",
"version": "v5.1",
......@@ -495,52 +542,6 @@
],
"time": "2017-03-12T19:16:23+00:00"
},
{
"name": "patrickschur/language-detection",
"version": "v3.3.0",
"source": {
"type": "git",
"url": "https://github.com/patrickschur/language-detection.git",
"reference": "21a2e7a1b9bf6bff578ac11c6dcf3d3668aeccdf"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/patrickschur/language-detection/zipball/21a2e7a1b9bf6bff578ac11c6dcf3d3668aeccdf",
"reference": "21a2e7a1b9bf6bff578ac11c6dcf3d3668aeccdf",
"shasum": ""
},
"require": {
"ext-mbstring": "*",
"php": "^7"
},
"require-dev": {
"phpunit/phpunit": "^6"
},
"type": "library",
"autoload": {
"psr-4": {
"LanguageDetection\\": "src/LanguageDetection"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Patrick Schur",
"email": "patrick_schur@outlook.de"
}
],
"description": "A language detection library for PHP. Detects the language from a given text string.",
"homepage": "https://github.com/patrickschur/language-detection",
"keywords": [
"detect",
"detection",
"language"
],
"time": "2018-02-01T17:12:47+00:00"
},
{
"name": "rinvex/country",
"version": "v3.1.0",
......
......@@ -43,4 +43,7 @@ return [
//Geolite2-city database file in mmdb format - full file path (pull.sh will update this monthly)
'geoip2db' => '',
//detectlanguage.com api key
'dlkey' => '',
];
......@@ -15,7 +15,7 @@ if (!in_array(PHP_SAPI, ['cgi-fcgi', 'cli'])) {
}
use GeoIp2\Database\Reader;
use LanguageDetection\Language;
use \DetectLanguage\DetectLanguage;
use Poduptime\PodStatus;
use RedBeanPHP\R;
......@@ -34,6 +34,8 @@ require_once __DIR__ . '/../boot.php';
$sqldebug && R::fancyDebug(true);
DetectLanguage::setApiKey(c('dlkey'));
try {
// Setup GeoIP Database
$reader = new Reader(c('geoip2db'));
......@@ -251,11 +253,21 @@ foreach ($pods as $pod) {
extract(_curl("https://{$domain}/"));
$outputbody = $curl_body;
($outputbody ? $d->loadHTML($outputbody) : $d->loadHTML('<html></html>'));
$body = $d->getElementsByTagName('html')->item(0);
if ($body) {
$ld = new Language;
$detectedlanguage = key($ld->detect($body->nodeValue)->bestResults()->close());
$hs = '';
for($type=1; $type<6; $type++)
{
$h_es = $d->getElementsByTagName('h'.$type);
foreach($h_es as $h)
{
if (strpos($h->textContent, 'JavaScript') === false) {
$hs .= $h->textContent . ' ';
}
}
}
if ($hs) {
$detectedlanguage = DetectLanguage::simpleDetect($hs);
} else {
$score -= 1;
$detectedlanguage = null;
......@@ -263,7 +275,7 @@ foreach ($pods as $pod) {
_debug('Detected Language', $detectedlanguage);
if (!$jsonssl || !$body) {
if (!$jsonssl || !$hs) {
_debug('Connection', 'Can not connect to pod');
......
......@@ -12,6 +12,8 @@ use RedBeanPHP\R;
require_once __DIR__ . '/boot.php';
$iso = new Matriphe\ISO639\ISO639;
try {
$pods = R::getAll('
SELECT domain, dnssec, podmin_statement, sslexpire, masterversion, shortversion, softwarename, daysmonitored, monthsmonitored, score, signup, name, country, countryname, city, state, detectedlanguage, uptime_alltime, active_users_halfyear, active_users_monthly, service_facebook, service_twitter, service_tumblr, service_wordpress, service_xmpp, latency, date_updated, ipv6, total_users, local_posts, comment_counts, userrating, status
......@@ -77,7 +79,7 @@ foreach ($pods as $pod) {
}
echo '<td>' . $pod['city'] . '</td>';
echo '<td>' . $pod['state'] . '</td>';
echo '<td>' . ($pod['detectedlanguage'] ? strtoupper($pod['detectedlanguage']) : '') . '</td>';
echo '<td data-toggle="tooltip" title="' . ($pod['detectedlanguage'] ? $iso->languageByCode1($pod['detectedlanguage']) : '') . '">' . ($pod['detectedlanguage'] ? strtoupper($pod['detectedlanguage']) : '') . '</td>';
echo '<td class="text-truncate">';
$pod['service_facebook'] && print '<div class="smlogo smlogo-facebook" data-toggle="tooltip" title="Publish to Facebook"></div>';
$pod['service_twitter'] && print '<div class="smlogo smlogo-twitter" data-toggle="tooltip" title="Publish to Twitter"></div>';
......
......@@ -132,7 +132,7 @@ EOF;
foreach ($languages as $language) {
printf(
'<label><input class="ml-2" type="radio" name="language" value="%1$s" /> %2$s</label><br>',
$language,
strtoupper($language),
$iso->languageByCode1($language)
);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment