Extract website language detection to separate function.

Initialise static DetectLanguage config on boot.
parent 6176a979
......@@ -6,6 +6,7 @@
declare(strict_types=1);
use DetectLanguage\DetectLanguage;
use RedBeanPHP\R;
/**
......@@ -39,6 +40,10 @@ define('PODUPTIME', microtime(true));
require_once __DIR__ . '/vendor/autoload.php';
// Initialise language detection.
DetectLanguage::setApiKey(c('dlkey'));
DetectLanguage::setSecure(true);
// Set up global DB connection.
R::setup(
sprintf(
......
......@@ -14,6 +14,7 @@ if (!in_array(PHP_SAPI, ['cgi-fcgi', 'cli'])) {
}
}
use Carbon\Carbon;
use DetectLanguage\DetectLanguage;
use GeoIp2\Database\Reader;
use Poduptime\PodStatus;
......@@ -34,10 +35,6 @@ require_once __DIR__ . '/../boot.php';
$sqldebug && R::fancyDebug(true);
DetectLanguage::setApiKey(c('dlkey'));
$hour = date('H');
try {
// Setup GeoIP Database
$reader = new Reader(c('geoip2db'));
......@@ -250,35 +247,20 @@ foreach ($pods as $pod) {
$status = PodStatus::UP;
}
$d = new DOMDocument;
libxml_use_internal_errors(true);
extract(_curl("https://{$domain}/"));
$outputbody = $curl_body;
($outputbody ? $d->loadHTML($outputbody) : $d->loadHTML('<html></html>'));
// Default to the already saved language.
$detectedlanguage = $language;
$hs = '';
for ($type = 1; $type < 6; $type++) {
$h_es = $d->getElementsByTagName('h' . $type);
foreach ($h_es as $h) {
if (strpos($h->textContent, 'JavaScript') === false) {
$hs .= $h->textContent . ' ';
}
}
}
if ($hs && $hour == 12) {
$detectedlanguage = DetectLanguage::simpleDetect($hs);
} elseif ($hs) {
$detectedlanguage = $language;
} else {
$score -= 1;
$language_snippet = getWebsiteLanguageSnippetFromUrl("https://{$domain}/");
if (!$language_snippet) {
$detectedlanguage = null;
--$score;
} elseif (Carbon::now()->hour === 12) {
$detectedlanguage = detectWebsiteLanguageFromSnippet($language_snippet);
}
_debug('Detected Language', $detectedlanguage);
if (!$jsonssl || !$hs) {
if (!$jsonssl || !$language_snippet) {
_debug('Connection', 'Can not connect to pod');
try {
......@@ -296,7 +278,7 @@ foreach ($pods as $pod) {
die('Error in SQL query: ' . $e->getMessage());
}
$score -= 1;
--$score;
$status = PodStatus::DOWN;
}
......@@ -488,3 +470,66 @@ function _curl(string $url): array
return $data;
}
/**
* Get a language snippet from a given URL.
*
* @param string $url
*
* @return null|string
*/
function getWebsiteLanguageSnippetFromUrl(string $url): ?string
{
$curl = _curl($url);
if (!$curl['body']) {
return null;
}
libxml_use_internal_errors(true);
$d = new DOMDocument;
$d->loadHTML($curl['body']);
$hs = '';
for ($type = 1; $type < 6; $type++) {
foreach ($d->getElementsByTagName('h' . $type) as $h) {
// Ignore possibly generic "JavaScript required" texts.
if (stripos($h->textContent, 'javascript') === false) {
$hs .= $h->textContent . ' ';
}
}
}
return $hs;
}
/**
* Detect the language of the given text snippet.
*
* @param string $snippet
*
* @return null|string
*/
function detectWebsiteLanguageFromSnippet(string $snippet): ?string
{
if (!$snippet) {
return null;
}
return DetectLanguage::simpleDetect($snippet);
}
/**
* Detect the website language of the given URL.
*
* @param string $url
*
* @return null|string
*/
function detectWebsiteLanguageFromUrl(string $url): ?string
{
return detectWebsiteLanguageFromSnippet(
getWebsiteLanguageSnippetFromUrl($url)
);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment