Add event to register own spiders
authorCyperghost <olaf_schmitz_1@t-online.de>
Tue, 27 Feb 2024 11:49:12 +0000 (12:49 +0100)
committerCyperghost <olaf_schmitz_1@t-online.de>
Tue, 27 Feb 2024 11:49:12 +0000 (12:49 +0100)
wcfsetup/install/files/lib/system/spider/SpiderHandler.class.php
wcfsetup/install/files/lib/system/spider/event/SpiderCollecting.class.php [new file with mode: 0644]

index d077969458f3d44a3a9f500e5a4a19a020e1dfdf..d4347f304b028281d2aaf6386b7ae65184113674 100644 (file)
@@ -2,7 +2,9 @@
 
 namespace wcf\system\spider;
 
+use wcf\system\event\EventHandler;
 use wcf\system\SingletonFactory;
+use wcf\system\spider\event\SpiderCollecting;
 
 /**
  * @author      Olaf Braun
@@ -11,16 +13,70 @@ use wcf\system\SingletonFactory;
  */
 final class SpiderHandler extends SingletonFactory
 {
+    /**
+     * @var Spider[]
+     */
+    private array $spiders = [];
+
+    private string $regex = '';
+
+    #[\Override]
+    protected function init()
+    {
+        parent::init();
+        $event = new SpiderCollecting();
+        $event->register(new Spider('ABCdatos', 'ABCdatos BotLink', 'http://www.robotstxt.org/db/abcdatos.html'));
+
+        EventHandler::getInstance()->fire($event);
+
+        $this->spiders = $event->getSpiders();
+
+        $firstCharacter = [];
+        foreach ($this->spiders as $identifier => $spider) {
+            if (!isset($firstCharacter[$identifier[0]])) {
+                $firstCharacter[$identifier[0]] = [];
+            }
+            $firstCharacter[$identifier[0]][] = \substr($identifier, 1);
+        }
+
+        $this->regex = '';
+        foreach ($firstCharacter as $char => $spiders) {
+            if ($this->regex !== '') {
+                $this->regex .= '|';
+            }
+            $this->regex .= \sprintf(
+                '(?:%s(?:%s))',
+                \preg_quote($char, '/'),
+                \implode('|', \array_map(static function ($identifier) {
+                    return \preg_quote($identifier, '/');
+                }, $spiders))
+            );
+        }
+
+        if ($this->regex === '') {
+            // This regex will never match anything.
+            $this->regex = '(?!)';
+        }
+        $this->regex = '/' . $this->regex . '/';
+    }
+
+    /**
+     * Returns the spider with the given identifier.
+     */
     public function getSpider(string $identifier): ?Spider
     {
-        //TODO
-        return null;
+        return $this->spiders[$identifier] ?? null;
     }
 
+    /**
+     * Finds the spider identifier for the given user agent.
+     */
     public function getIdentifier(string $userAgent): ?string
     {
-        $userAgent = \strtolower($userAgent);
-        //TODO
+        $userAgent = \mb_strtolower($userAgent);
+        if (\preg_match($this->regex, $userAgent, $matches)) {
+            return $matches[0];
+        }
 
         return null;
     }
diff --git a/wcfsetup/install/files/lib/system/spider/event/SpiderCollecting.class.php b/wcfsetup/install/files/lib/system/spider/event/SpiderCollecting.class.php
new file mode 100644 (file)
index 0000000..3c17c5f
--- /dev/null
@@ -0,0 +1,39 @@
+<?php
+
+namespace wcf\system\spider\event;
+
+use wcf\system\event\IEvent;
+use wcf\system\spider\Spider;
+
+/**
+ * @author      Olaf Braun
+ * @copyright   2001-2024 WoltLab GmbH
+ * @license     GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
+ */
+final class SpiderCollecting implements IEvent
+{
+    /**
+     * @var Spider[]
+     */
+    private array $spiders = [];
+
+    /**
+     * Registers a spider.
+     */
+    public function register(Spider $spider): void
+    {
+        $identifier = \mb_strtolower($spider->identifier);
+        if (\array_key_exists($identifier, $this->spiders)) {
+            throw new \InvalidArgumentException('Spider with identifier ' . $identifier . ' already exists');
+        }
+        $this->spiders[$identifier] = $spider;
+    }
+
+    /**
+     * @return Spider[]
+     */
+    public function getSpiders(): array
+    {
+        return $this->spiders;
+    }
+}