diff --git a/db/pull.php b/db/pull.php index 6a158bd8c04a8b5c99ef3bdc1e7cf61c3d9517ed..ba9a5748be1b9cd586620fee0c09fb9bbfe489cd 100644 --- a/db/pull.php +++ b/db/pull.php @@ -254,7 +254,7 @@ foreach ($pods as $pod) { if (!$language_snippet) { $detectedlanguage = null; --$score; - } elseif (Carbon::now()->hour === 12 || $debug) { + } elseif ($debug || Carbon::now()->hour === 12) { $detectedlanguage = detectWebsiteLanguageFromSnippet($language_snippet); } @@ -491,26 +491,25 @@ function getWebsiteLanguageSnippetFromUrl(string $url): ?string $d = new DOMDocument; $d->loadHTML($curl['body']); - $hs = ''; + $snippet = $d->getElementsByTagName('title')->item(0)->textContent ?? ''; + for ($type = 1; $type < 6; $type++) { foreach ($d->getElementsByTagName('h' . $type) as $h) { // Ignore possibly generic "JavaScript required" texts. if (stripos($h->textContent, 'javascript') === false) { - $hs .= $h->textContent . ' '; + $snippet .= ' ' . $h->textContent; } } } - $hs .= $d->getElementsByTagName('title')->item(0)->textContent . ' '; - - $metas = $d->getElementsByTagName('meta'); - foreach ($metas as $meta) { - if (strtolower($meta->getAttribute('name')) == 'description') { - $hs .= $meta->getAttribute('value'); + // Get descriptions of meta tags. + foreach ($d->getElementsByTagName('meta') as $meta) { + if (strtolower($meta->getAttribute('name')) === 'description') { + $snippet .= ' ' . $meta->getAttribute('value'); } } - return $hs; + return $snippet; } /**