Take article age into account for related article list
authorTim Düsterhus <duesterhus@woltlab.com>
Wed, 3 Jun 2020 11:04:58 +0000 (13:04 +0200)
committerTim Düsterhus <duesterhus@woltlab.com>
Wed, 3 Jun 2020 11:17:47 +0000 (13:17 +0200)
Instead of just ordering the related articles by the number of matching
tags the articles are now also ordered by their age as a tie-breaker,
referring more recent articles. Previously the order was undefined,
usually preferring older articles, as their rows appear earlier within
the `tag_to_object` table.

It was verified that the new query does not result in a significantly
worse query plan than the old one. The only difference is the addition
of 2 `eq_ref` joins, which is the best possible join type. However we
could get rid of a separate query to pull the articleIDs, because they
are readily available by the pure virtue of having to join the article
table to get the time.

Another possible solution would be ordering by `ABS(article.time - ?)`
with the currently viewed article's time in place of the question mark.
This would prefer articles written within a similar timeframe, without
resulting in a worse query plan within my tests.

Resolves #3285

wcfsetup/install/files/lib/page/AbstractArticlePage.class.php

index fdcb05187b4679fa16caba85e131c8d60e2ae196..32befc7cde032c0b6dbb3f9a107a0a98000ccd16 100644 (file)
@@ -137,29 +137,24 @@ abstract class AbstractArticlePage extends AbstractPage {
                if (MODULE_TAGGING && ARTICLE_RELATED_ARTICLES) {
                        if (!empty($this->tags)) {
                                $conditionBuilder = new PreparedStatementConditionBuilder();
-                               $conditionBuilder->add('objectTypeID = ?', [TagEngine::getInstance()->getObjectTypeID('com.woltlab.wcf.article')]);
-                               $conditionBuilder->add('tagID IN (?)', [array_keys($this->tags)]);
-                               $conditionBuilder->add('objectID <> ?', [$this->articleContentID]);
-                               $sql = "SELECT          objectID, COUNT(*) AS count
-                                       FROM            wcf" . WCF_N . "_tag_to_object
+                               $conditionBuilder->add('tag_to_object.objectTypeID = ?', [TagEngine::getInstance()->getObjectTypeID('com.woltlab.wcf.article')]);
+                               $conditionBuilder->add('tag_to_object.tagID IN (?)', [array_keys($this->tags)]);
+                               $conditionBuilder->add('tag_to_object.objectID <> ?', [$this->articleContentID]);
+                               $sql = "SELECT          article.articleID, COUNT(*) AS count
+                                       FROM            wcf" . WCF_N . "_tag_to_object tag_to_object
+                                       INNER JOIN      wcf" . WCF_N . "_article_content article_content
+                                       ON              tag_to_object.objectID = article_content.articleContentID
+                                       INNER JOIN      wcf" . WCF_N . "_article article
+                                       ON              article_content.articleID = article.articleID
                                        " . $conditionBuilder . "
-                                       GROUP BY        objectID
+                                       GROUP BY        tag_to_object.objectID
                                        HAVING          COUNT(*) >= " . round(count($this->tags) * ARTICLE_RELATED_ARTICLES_MATCH_THRESHOLD / 100) . "
-                                       ORDER BY        count DESC";
+                                       ORDER BY        count DESC, article.time DESC";
                                $statement = WCF::getDB()->prepareStatement($sql, ARTICLE_RELATED_ARTICLES);
                                $statement->execute($conditionBuilder->getParameters());
-                               $articleContentIDs = $statement->fetchAll(\PDO::FETCH_COLUMN);
+                               $articleIDs = $statement->fetchAll(\PDO::FETCH_COLUMN);
                                
-                               if (!empty($articleContentIDs)) {
-                                       $conditionBuilder = new PreparedStatementConditionBuilder();
-                                       $conditionBuilder->add('articleContentID IN (?)', [$articleContentIDs]);
-                                       $sql = "SELECT          articleID
-                                               FROM            wcf" . WCF_N . "_article_content
-                                               " . $conditionBuilder;
-                                       $statement = WCF::getDB()->prepareStatement($sql);
-                                       $statement->execute($conditionBuilder->getParameters());
-                                       $articleIDs = $statement->fetchAll(\PDO::FETCH_COLUMN);
-                                       
+                               if (!empty($articleIDs)) {
                                        $this->relatedArticles = new AccessibleArticleList();
                                        $this->relatedArticles->getConditionBuilder()->add('article.articleID IN (?)', [$articleIDs]);
                                        $this->relatedArticles->sqlOrderBy = 'article.time';