From 8c6a22a5d6027823bcf838c06607d71c0f0dc7e5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Tim=20D=C3=BCsterhus?= Date: Wed, 27 Jan 2021 10:59:30 +0100 Subject: [PATCH] Stop accessing wcf1_tag_to_object.languageID in TagCloudCacheBuilder::getTags() This change comes with one primary benefit: This stops accessing the redundant `languageID` column that is functionally dependent on the tagID (see #3803). On MariaDB 10.1 at a first glance this results in a *much* better query plan (note the lower row count for the first query): MariaDB [*snip*]> EXPLAIN -> SELECT tag.tagid, -> Count(object.objectid) AS counter -> FROM wcf1_tag_to_object object -> INNER JOIN wcf1_tag tag -> ON tag.tagid = object.tagid -> WHERE object.objecttypeid IN ( 92 ) -> AND tag.languageid IN ( 1 ) -> GROUP BY tag.tagid -> ORDER BY counter DESC, -> tag.tagid DESC -> LIMIT 500; +------+-------------+--------+------+-----------------------------------+------------+---------+---------------------------+------+-----------------------------------------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +------+-------------+--------+------+-----------------------------------+------------+---------+---------------------------+------+-----------------------------------------------------------+ | 1 | SIMPLE | tag | ref | PRIMARY,languageID | languageID | 4 | const | 5299 | Using where; Using index; Using temporary; Using filesort | | 1 | SIMPLE | object | ref | objectTypeID,objectTypeID_2,tagID | tagID | 8 | *snip*.tag.tagID,const | 3 | Using index | +------+-------------+--------+------+-----------------------------------+------------+---------+---------------------------+------+-----------------------------------------------------------+ 2 rows in set (0.00 sec) MariaDB [*snip*]> MariaDB [*snip*]> EXPLAIN -> SELECT object.tagid, -> Count(*) AS counter -> FROM wcf1_tag_to_object object -> WHERE object.objecttypeid IN ( 92 ) -> AND object.languageid IN ( 1 ) -> GROUP BY object.tagid -> ORDER BY counter DESC, -> object.tagid DESC -> LIMIT 500; +------+-------------+--------+------+-----------------------------------------------------------------+--------------+---------+-------------+-------+-----------------------------------------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +------+-------------+--------+------+-----------------------------------------------------------------+--------------+---------+-------------+-------+-----------------------------------------------------------+ | 1 | SIMPLE | object | ref | objectTypeID,objectTypeID_2,cbbba36334575d806c002a8756c8a107_fk | objectTypeID | 8 | const,const | 56293 | Using where; Using index; Using temporary; Using filesort | +------+-------------+--------+------+-----------------------------------------------------------------+--------------+---------+-------------+-------+-----------------------------------------------------------+ 1 row in set (0.00 sec) When running the query this unfortunately is a bit slower 0.05 (new) vs 0.03 (old) seconds. This can also be confirmed with a (larger) MySQL 8 installation and using EXPLAIN ANALYZE: > EXPLAIN ANALYZE -> SELECT tag.tagid, -> Count(object.objectid) AS counter -> FROM wcf1_tag_to_object object -> INNER JOIN wcf1_tag tag -> ON tag.tagid = object.tagid -> WHERE object.objecttypeid IN ( 203, 337 ) -> AND tag.languageid IN ( 1 ) -> GROUP BY tag.tagid -> ORDER BY counter DESC -> LIMIT 500 \G *************************** 1. row *************************** EXPLAIN: -> Limit: 500 row(s) (actual time=166.506..166.540 rows=500 loops=1) -> Sort: counter DESC, limit input to 500 row(s) per chunk (actual time=166.505..166.520 rows=500 loops=1) -> Table scan on (actual time=0.001..0.509 rows=16716 loops=1) -> Aggregate using temporary table (actual time=163.863..165.054 rows=16716 loops=1) -> Nested loop inner join (cost=14797.71 rows=87401) (actual time=0.056..112.533 rows=201296 loops=1) -> Index lookup on tag using languageID (languageID=1) (cost=1132.88 rows=9229) (actual time=0.037..3.809 rows=18651 loops=1) -> Filter: (object.objectTypeID in (203,337)) (cost=0.27 rows=9) (actual time=0.002..0.005 rows=11 loops=18651) -> Index lookup on object using tagID (tagID=tag.tagID) (cost=0.27 rows=12) (actual time=0.002..0.004 rows=11 loops=18651) 1 row in set (0.16 sec) > EXPLAIN ANALYZE -> SELECT object.tagid, -> Count(*) AS counter -> FROM wcf1_tag_to_object object -> WHERE object.objecttypeid IN ( 203, 337 ) -> AND object.languageid IN ( 1 ) -> GROUP BY object.tagid -> ORDER BY counter DESC -> LIMIT 500 \G *************************** 1. row *************************** EXPLAIN: -> Limit: 500 row(s) (actual time=135.420..135.461 rows=500 loops=1) -> Sort: counter DESC, limit input to 500 row(s) per chunk (actual time=135.419..135.437 rows=500 loops=1) -> Table scan on (actual time=0.000..0.560 rows=16716 loops=1) -> Aggregate using temporary table (actual time=132.599..133.936 rows=16716 loops=1) -> Filter: ((object.languageID = 1) and (object.objectTypeID in (203,337))) (cost=31642.06 rows=157965) (actual time=0.080..72.889 rows=201296 loops=1) -> Index range scan on object using objectTypeID (cost=31642.06 rows=157965) (actual time=0.076..53.904 rows=201296 loops=1) 1 row in set (0.14 sec) Nonetheless this appears to be worth it. Especially if we can remove the `languageID` column in the future. I also attempted to get rid of the second query, by simply putting a `tag.*` into the column list of the first query. Unfortunately MariaDB 10.1 (which is our minimum requirement) is too dumb to determine that all the columns in the `tag` table are functionally dependent on `tag.tagID`. MySQL 8 is able to handle that correctly. I have verified that both queries result in the same results (except for the undefined ordering when the counter is identical). --- .../system/cache/builder/TagCloudCacheBuilder.class.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wcfsetup/install/files/lib/system/cache/builder/TagCloudCacheBuilder.class.php b/wcfsetup/install/files/lib/system/cache/builder/TagCloudCacheBuilder.class.php index eec3d1f038..6e11be7ad2 100644 --- a/wcfsetup/install/files/lib/system/cache/builder/TagCloudCacheBuilder.class.php +++ b/wcfsetup/install/files/lib/system/cache/builder/TagCloudCacheBuilder.class.php @@ -88,11 +88,13 @@ class TagCloudCacheBuilder extends AbstractCacheBuilder // get tag ids $conditionBuilder = new PreparedStatementConditionBuilder(); $conditionBuilder->add('object.objectTypeID IN (?)', [$this->objectTypeIDs]); - $conditionBuilder->add('object.languageID IN (?)', [$this->languageIDs]); - $sql = "SELECT COUNT(*) AS counter, object.tagID + $conditionBuilder->add('tag.languageID IN (?)', [$this->languageIDs]); + $sql = "SELECT tag.tagID, COUNT(*) AS counter FROM wcf" . WCF_N . "_tag_to_object object + INNER JOIN wcf" . WCF_N . "_tag tag + ON tag.tagID = object.tagID " . $conditionBuilder . " - GROUP BY object.tagID + GROUP BY tag.tagID ORDER BY counter DESC"; $statement = WCF::getDB()->prepareStatement($sql, 500); $statement->execute($conditionBuilder->getParameters()); -- 2.20.1