60247ec85c2fdf80361c176006d4f1ab380716fe
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / spider / SpiderHandler.class.php
1 <?php
2
3 namespace wcf\system\spider;
4
5 use wcf\system\event\EventHandler;
6 use wcf\system\SingletonFactory;
7 use wcf\system\spider\event\SpiderCollecting;
8
9 /**
10 * Handles spider related operations.
11 *
12 * @author Olaf Braun
13 * @copyright 2001-2024 WoltLab GmbH
14 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
15 * @since 6.1
16 */
17 final class SpiderHandler extends SingletonFactory
18 {
19 /**
20 * @var Spider[]
21 */
22 private array $spiders = [];
23
24 private string $regex = '';
25
26 #[\Override]
27 protected function init()
28 {
29 parent::init();
30 $event = new SpiderCollecting();
31 EventHandler::getInstance()->fire($event);
32
33 $this->spiders = $event->getSpiders();
34
35 $firstCharacter = [];
36 foreach ($this->spiders as $identifier => $spider) {
37 if (!isset($firstCharacter[$identifier[0]])) {
38 $firstCharacter[$identifier[0]] = [];
39 }
40 $firstCharacter[$identifier[0]][] = \substr($identifier, 1);
41 }
42
43 $this->regex = '';
44 foreach ($firstCharacter as $char => $spiders) {
45 if ($this->regex !== '') {
46 $this->regex .= '|';
47 }
48 $this->regex .= \sprintf(
49 '(?:%s(?:%s))',
50 \preg_quote($char, '/'),
51 \implode('|', \array_map(static function ($identifier) {
52 return \preg_quote($identifier, '/');
53 }, $spiders))
54 );
55 }
56
57 if ($this->regex === '') {
58 // This regex will never match anything.
59 $this->regex = '(?!)';
60 }
61 $this->regex = '/' . $this->regex . '/';
62 }
63
64 /**
65 * Returns the spider with the given identifier.
66 */
67 public function getSpider(string $identifier): ?Spider
68 {
69 return $this->spiders[$identifier] ?? null;
70 }
71
72 /**
73 * Finds the spider identifier for the given user agent.
74 */
75 public function getIdentifier(string $userAgent): ?string
76 {
77 $userAgent = \mb_strtolower($userAgent);
78 if (\preg_match($this->regex, $userAgent, $matches)) {
79 return $matches[0];
80 }
81
82 return null;
83 }
84 }