drm/amdkfd: Add module parameter of send_sigterm
authorOded Gabbay <oded.gabbay@gmail.com>
Wed, 24 Dec 2014 11:30:52 +0000 (13:30 +0200)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 19 May 2015 10:02:28 +0000 (13:02 +0300)
This patch adds a new kernel module parameter to amdkfd,
called send_sigterm.

This parameter specifies whether amdkfd should send the
SIGTERM signal to an HSA process, when the following conditions
occur:

1. The GPU triggers an exception regarding a kernel that was
   issued by this process.

2. The HSA process isn't waiting on an event that handles
   this exception.

The default behavior is not to send a SIGTERM and suffice
with a dmesg error print.

Reviewed-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdkfd/kfd_events.c
drivers/gpu/drm/amd/amdkfd/kfd_module.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 5c3a81e667a80ec403ae7734403dec12b423c24c..3cb37d220f6033cd128ae8cac52c651089e290ce 100644 (file)
@@ -872,10 +872,16 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 
        /* Send SIGTERM no event of type "type" has been found*/
        if (send_signal) {
-               dev_warn(kfd_device,
-                       "Sending SIGTERM to HSA Process with PID %d ",
+               if (send_sigterm) {
+                       dev_warn(kfd_device,
+                               "Sending SIGTERM to HSA Process with PID %d ",
+                                       p->lead_thread->pid);
+                       send_sig(SIGTERM, p->lead_thread, 0);
+               } else {
+                       dev_err(kfd_device,
+                               "HSA Process (PID %d) got unhandled exception",
                                p->lead_thread->pid);
-               send_sig(SIGTERM, p->lead_thread, 0);
+               }
        }
 }
 
index 4e0a68f13a77bc4153256161224c0ab4fc3cf5bc..e4fc96ec407305702168b84eccc46a0fa6fdc9fc 100644 (file)
@@ -54,6 +54,11 @@ module_param(max_num_of_queues_per_device, int, 0444);
 MODULE_PARM_DESC(max_num_of_queues_per_device,
        "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
 
+int send_sigterm;
+module_param(send_sigterm, int, 0444);
+MODULE_PARM_DESC(send_sigterm,
+       "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+
 bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
 {
        /*
index 9383494b429ed9df391df8227a197ea587ce3d87..b6f838f56589664297232a4e2700a0c39cca5bcc 100644 (file)
@@ -74,6 +74,12 @@ extern int max_num_of_queues_per_device;
 /* Kernel module parameter to specify the scheduling policy */
 extern int sched_policy;
 
+/*
+ * Kernel module parameter to specify whether to send sigterm to HSA process on
+ * unhandled exception
+ */
+extern int send_sigterm;
+
 /**
  * enum kfd_sched_policy
  *