3 namespace wcf\system\spider
;
5 use wcf\system\event\EventHandler
;
6 use wcf\system\SingletonFactory
;
7 use wcf\system\spider\event\SpiderCollecting
;
10 * Handles spider related operations.
13 * @copyright 2001-2024 WoltLab GmbH
14 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
17 final class SpiderHandler
extends SingletonFactory
22 private array $spiders = [];
24 private string $regex = '';
27 protected function init()
30 $event = new SpiderCollecting();
31 EventHandler
::getInstance()->fire($event);
33 $this->spiders
= $event->getSpiders();
36 foreach ($this->spiders
as $identifier => $spider) {
37 if (!isset($firstCharacter[$identifier[0]])) {
38 $firstCharacter[$identifier[0]] = [];
40 $firstCharacter[$identifier[0]][] = \
substr($identifier, 1);
44 foreach ($firstCharacter as $char => $spiders) {
45 if ($this->regex
!== '') {
48 $this->regex
.= \
sprintf(
50 \
preg_quote($char, '/'),
51 \
implode('|', \array_map
(static function ($identifier) {
52 return \
preg_quote($identifier, '/');
57 if ($this->regex
=== '') {
58 // This regex will never match anything.
59 $this->regex
= '(?!)';
61 $this->regex
= '/' . $this->regex
. '/';
65 * Returns the spider with the given identifier.
67 public function getSpider(string $identifier): ?Spider
69 return $this->spiders
[$identifier] ??
null;
73 * Finds the spider identifier for the given user agent.
75 public function getIdentifier(string $userAgent): ?
string
77 $userAgent = \
mb_strtolower($userAgent);
78 if (\
preg_match($this->regex
, $userAgent, $matches)) {