drm/amdkfd: Add static user-mode queues support
authorYair Shachar <yair.shachar@amd.com>
Wed, 20 May 2015 10:43:04 +0000 (13:43 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Wed, 3 Jun 2015 08:32:28 +0000 (11:32 +0300)
This patch adds support for static user-mode queues in QCM.
Queues which are designated as static can NOT be preempted by
the CP microcode when it is executing its scheduling algorithm.

This is needed for supporting the debugger feature, because we
can't allow the CP to preempt queues which are currently being debugged.

The number of queues that can be designated as static is limited by the
number of HQDs (Hardware Queue Descriptors).

Signed-off-by: Yair Shachar <yair.shachar@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 52cab0f53ebc6e03c194208d65c10abd39754d2e..f1f86db27f73ef981b4a0a76aa96990802933d44 100644 (file)
@@ -33,6 +33,8 @@
 static const struct kfd_device_info kaveri_device_info = {
        .asic_family = CHIP_KAVERI,
        .max_pasid_bits = 16,
+       /* max num of queues for KV.TODO should be a dynamic value */
+       .max_no_of_hqd  = 24,
        .ih_ring_entry_size = 4 * sizeof(uint32_t),
        .event_interrupt_class = &event_interrupt_class_cik,
        .mqd_size_aligned = MQD_SIZE_ALIGNED
index 4e215bd4d41f0ae872c2568f1746b9c9ec68e6c8..a5dc8228b6b15e480e43ca026208955f13524c57 100644 (file)
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+                               bool preempt_static_queues, bool lock);
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
                                        struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
 
        BUG_ON(!dqm);
 
-       destroy_queues_cpsch(dqm, true);
+       destroy_queues_cpsch(dqm, true, true);
 
        list_for_each_entry(node, &dqm->queues, list) {
                pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
        pr_debug("kfd: In %s\n", __func__);
 
        mutex_lock(&dqm->lock);
-       destroy_queues_cpsch(dqm, false);
+       /* here we actually preempt the DIQ */
+       destroy_queues_cpsch(dqm, true, false);
        list_del(&kq->list);
        dqm->queue_count--;
        qpd->is_debug = false;
@@ -935,13 +937,15 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
                                unsigned int sdma_engine)
 {
        return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+                       KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
                        sdma_engine);
 }
 
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+                               bool preempt_static_queues, bool lock)
 {
        int retval;
+       enum kfd_preempt_type_filter preempt_type;
 
        BUG_ON(!dqm);
 
@@ -960,8 +964,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
                destroy_sdma_queues(dqm, 1);
        }
 
+       preempt_type = preempt_static_queues ?
+                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+                       KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
        retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
-                       KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+                       preempt_type, 0, false, 0);
        if (retval != 0)
                goto out;
 
@@ -989,7 +997,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
        if (lock)
                mutex_lock(&dqm->lock);
 
-       retval = destroy_queues_cpsch(dqm, false);
+       retval = destroy_queues_cpsch(dqm, false, false);
        if (retval != 0) {
                pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
                goto out;
@@ -1024,13 +1032,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 {
        int retval;
        struct mqd_manager *mqd;
+       bool preempt_all_queues;
 
        BUG_ON(!dqm || !qpd || !q);
 
+       preempt_all_queues = false;
+
        retval = 0;
 
        /* remove queue from list to prevent rescheduling after preemption */
        mutex_lock(&dqm->lock);
+
+       if (qpd->is_debug) {
+               /*
+                * error, currently we do not allow to destroy a queue
+                * of a currently debugged process
+                */
+               retval = -EBUSY;
+               goto failed_try_destroy_debugged_queue;
+
+       }
+
        mqd = dqm->ops.get_mqd_manager(dqm,
                        get_mqd_type_from_queue_type(q->properties.type));
        if (!mqd) {
@@ -1062,6 +1084,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
        return 0;
 
 failed:
+failed_try_destroy_debugged_queue:
+
        mutex_unlock(&dqm->lock);
        return retval;
 }
index 57278e2d72e0ae6b76c4fc519144016ee024dcdc..ec4036a09f3e78f7343a4ba87d88dc1aa0d71b21 100644 (file)
@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
                                struct queue *q,
                                struct qcm_process_device *qpd,
                                int *allocate_vmid);
+
        int     (*destroy_queue)(struct device_queue_manager *dqm,
                                struct qcm_process_device *qpd,
                                struct queue *q);
+
        int     (*update_queue)(struct device_queue_manager *dqm,
                                struct queue *q);
 
@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
 
        int     (*register_process)(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
+
        int     (*unregister_process)(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd);
+
        int     (*initialize)(struct device_queue_manager *dqm);
        int     (*start)(struct device_queue_manager *dqm);
        int     (*stop)(struct device_queue_manager *dqm);
@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
        int     (*create_kernel_queue)(struct device_queue_manager *dqm,
                                        struct kernel_queue *kq,
                                        struct qcm_process_device *qpd);
+
        void    (*destroy_kernel_queue)(struct device_queue_manager *dqm,
                                        struct kernel_queue *kq,
                                        struct qcm_process_device *qpd);
+
        bool    (*set_cache_memory_policy)(struct device_queue_manager *dqm,
                                           struct qcm_process_device *qpd,
                                           enum cache_policy default_policy,
index e2533d875f4311e2c6fa426b4ea606c981b1675b..99b6d28a11c3e9030c734a58170ec4bb35bac56f 100644 (file)
@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
        num_queues = 0;
        list_for_each_entry(cur, &qpd->queues_list, list)
                num_queues++;
-       packet->bitfields10.num_queues = num_queues;
+       packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
 
        packet->sh_mem_config = qpd->sh_mem_config;
        packet->sh_mem_bases = qpd->sh_mem_bases;
@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 }
 
 static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
-                               struct queue *q)
+                               struct queue *q, bool is_static)
 {
        struct pm4_map_queues *packet;
+       bool use_static = is_static;
 
        BUG_ON(!pm || !buffer || !q);
 
@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
        case KFD_QUEUE_TYPE_SDMA:
                packet->bitfields2.engine_sel =
                                engine_sel__mes_map_queues__sdma0;
+               use_static = false; /* no static queues under SDMA */
                break;
        default:
                BUG();
@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
        packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
                        q->properties.doorbell_off;
 
+       packet->mes_map_queues_ordinals[0].bitfields3.is_static =
+                       (use_static == true) ? 1 : 0;
+
        packet->mes_map_queues_ordinals[0].mqd_addr_lo =
                        lower_32_bits(q->gart_mqd_addr);
 
@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
                        pm_release_ib(pm);
                        return -ENOMEM;
                }
+
                retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
                if (retval != 0)
                        return retval;
+
                proccesses_mapped++;
                inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
                                alloc_size_bytes);
@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
                list_for_each_entry(kq, &qpd->priv_queue_list, list) {
                        if (kq->queue->properties.is_active != true)
                                continue;
+
+                       pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+                               kq->queue->queue, qpd->is_debug);
+
                        retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
-                                                       kq->queue);
+                                               kq->queue, qpd->is_debug);
                        if (retval != 0)
                                return retval;
-                       inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
-                                       alloc_size_bytes);
+
+                       inc_wptr(&rl_wptr,
+                               sizeof(struct pm4_map_queues),
+                               alloc_size_bytes);
                }
 
                list_for_each_entry(q, &qpd->queues_list, list) {
                        if (q->properties.is_active != true)
                                continue;
-                       retval = pm_create_map_queue(pm,
-                                               &rl_buffer[rl_wptr], q);
+
+                       pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+                               q->queue, qpd->is_debug);
+
+                       retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+                                               q,  qpd->is_debug);
+
                        if (retval != 0)
                                return retval;
-                       inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
-                                       alloc_size_bytes);
+
+                       inc_wptr(&rl_wptr,
+                               sizeof(struct pm4_map_queues),
+                               alloc_size_bytes);
                }
        }
 
@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 
        packet = (struct pm4_unmap_queues *)buffer;
        memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-
+       pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+               mode, reset, type);
        packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
                                        sizeof(struct pm4_unmap_queues));
        switch (type) {
@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
                packet->bitfields2.queue_sel =
                                queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
                break;
+       case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+               /* in this case, we do not preempt static queues */
+               packet->bitfields2.queue_sel =
+                               queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+               break;
        default:
                BUG();
                break;
index 071ad5724bd2cd12d42963fe23a7a9ad7cda24c3..5b393f3e34a9f0a0082c1eb6edd8370cb638e223 100644 (file)
@@ -237,7 +237,8 @@ struct pm4_map_queues {
        struct {
                union {
                        struct {
-                               uint32_t reserved5:2;
+                               uint32_t is_static:1;
+                               uint32_t reserved5:1;
                                uint32_t doorbell_offset:21;
                                uint32_t reserved6:3;
                                uint32_t queue:6;
@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
 enum unmap_queues_queue_sel_enum {
        queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
        queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
-       queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+       queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
+       queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
 };
 
 enum unmap_queues_engine_sel_enum {
index 816c7a1f3294f7dd4939684878583757a8bf504a..eda7281c4a07bd5ae95f0863e3e7ee2971be4420 100644 (file)
@@ -128,6 +128,7 @@ struct kfd_device_info {
        unsigned int asic_family;
        const struct kfd_event_interrupt_class *event_interrupt_class;
        unsigned int max_pasid_bits;
+       unsigned int max_no_of_hqd;
        size_t ih_ring_entry_size;
        uint8_t num_of_watch_points;
        uint16_t mqd_size_aligned;
@@ -231,6 +232,7 @@ struct device *kfd_chardev(void);
 enum kfd_preempt_type_filter {
        KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
        KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+       KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
        KFD_PREEMPT_TYPE_FILTER_BY_PASID
 };
 
index 530b82c4e78b1eca87d6ebfe72e7ecf763fc8a8c..85b7fecd1c5d4d6f34e0fa5e4f7a715b226875b2 100644 (file)
@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
        struct queue *q;
        struct process_queue_node *pqn;
        struct kernel_queue *kq;
+       int num_queues = 0;
+       struct queue *cur;
 
        BUG_ON(!pqm || !dev || !properties || !qid);
 
@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
                return -1;
        }
 
+       /*
+        * for debug process, verify that it is within the static queues limit
+        * currently limit is set to half of the total avail HQD slots
+        * If we are just about to create DIQ, the is_debug flag is not set yet
+        * Hence we also check the type as well
+        */
+       if ((pdd->qpd.is_debug) ||
+               (type == KFD_QUEUE_TYPE_DIQ)) {
+               list_for_each_entry(cur, &pdd->qpd.queues_list, list)
+                       num_queues++;
+               if (num_queues >= dev->device_info->max_no_of_hqd/2)
+                       return (-ENOSPC);
+       }
+
        retval = find_available_queue_slot(pqm, qid);
        if (retval != 0)
                return retval;