Merge tag 'v3.10.90' into update
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / fork.c
index 987b28a1f01b6c6ce5d554eb22d9c89e1cde1e7b..d15fc57a65106b34aaadcf12e9205a37399d44c5 100644 (file)
 #define CREATE_TRACE_POINTS
 #include <trace/events/task.h>
 
+#ifdef CONFIG_MT_PRIO_TRACER
+# include <linux/prio_tracer.h>
+#endif
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
@@ -198,6 +202,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+/* Notifier list called when a task struct is freed */
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+
 static void account_kernel_stack(struct thread_info *ti, int account)
 {
        struct zone *zone = page_zone(virt_to_page(ti));
@@ -231,6 +238,18 @@ static inline void put_signal_struct(struct signal_struct *sig)
                free_signal_struct(sig);
 }
 
+int task_free_register(struct notifier_block *n)
+{
+       return atomic_notifier_chain_register(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_register);
+
+int task_free_unregister(struct notifier_block *n)
+{
+       return atomic_notifier_chain_unregister(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_unregister);
+
 void __put_task_struct(struct task_struct *tsk)
 {
        WARN_ON(!tsk->exit_state);
@@ -242,6 +261,7 @@ void __put_task_struct(struct task_struct *tsk)
        delayacct_tsk_free(tsk);
        put_signal_struct(tsk->signal);
 
+       atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
        if (!profile_handoff_task(tsk))
                free_task(tsk);
 }
@@ -299,18 +319,31 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        int err;
 
        tsk = alloc_task_struct_node(node);
-       if (!tsk)
+       if (!tsk){
+               printk("[%d:%s] fork fail at alloc_tsk_node, please check kmem_cache_alloc_node()\n", current->pid, current->comm);
                return NULL;
-
+       }
        ti = alloc_thread_info_node(tsk, node);
-       if (!ti)
+       if (!ti) {
+               printk("[%d:%s] fork fail at alloc_t_info_node, please check alloc_pages_node()\n", current->pid, current->comm);
                goto free_tsk;
+       }
 
        err = arch_dup_task_struct(tsk, orig);
-       if (err)
+       if (err){
+               printk("[%d:%s] fork fail at arch_dup_task_struct, err:%d \n", current->pid, current->comm, err);
                goto free_ti;
-
+       }
        tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+       /*
+        * We must handle setting up seccomp filters once we're under
+        * the sighand lock in case orig has changed between now and
+        * then. Until then, filter must be NULL to avoid messing up
+        * the usage counts on the error path calling free_task.
+        */
+       tsk->seccomp.filter = NULL;
+#endif
 
        setup_thread_stack(tsk, orig);
        clear_user_return_notifier(tsk);
@@ -544,6 +577,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
        mm->cached_hole_size = ~0UL;
        mm_init_aio(mm);
        mm_init_owner(mm, p);
+       clear_tlb_flush_pending(mm);
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
@@ -696,7 +730,8 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 
        mm = get_task_mm(task);
        if (mm && mm != current->mm &&
-                       !ptrace_may_access(task, mode)) {
+                       !ptrace_may_access(task, mode) &&
+                       !capable(CAP_SYS_RESOURCE)) {
                mmput(mm);
                mm = ERR_PTR(-EACCES);
        }
@@ -871,6 +906,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
        int retval;
 
        tsk->min_flt = tsk->maj_flt = 0;
+#ifdef CONFIG_ZRAM
+    tsk->fm_flt = tsk->swap_in = tsk->swap_out = 0;
+#endif
        tsk->nvcsw = tsk->nivcsw = 0;
 #ifdef CONFIG_DETECT_HUNG_TASK
        tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
@@ -1044,6 +1082,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sig->nr_threads = 1;
        atomic_set(&sig->live, 1);
        atomic_set(&sig->sigcnt, 1);
+
+       /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
+       sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
+       tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
+
        init_waitqueue_head(&sig->wait_chldexit);
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
@@ -1085,6 +1128,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
        p->flags = new_flags;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+       /*
+        * Must be called with sighand->lock held, which is common to
+        * all threads in the group. Holding cred_guard_mutex is not
+        * needed because this new task is not yet running and cannot
+        * be racing exec.
+        */
+       assert_spin_locked(&current->sighand->siglock);
+
+       /* Ref-count the new filter user, and assign it. */
+       get_seccomp_filter(current);
+       p->seccomp = current->seccomp;
+
+       /*
+        * Explicitly enable no_new_privs here in case it got set
+        * between the task_struct being duplicated and holding the
+        * sighand lock. The seccomp state and nnp must be in sync.
+        */
+       if (task_no_new_privs(current))
+               task_set_no_new_privs(p);
+
+       /*
+        * If the parent gained a seccomp mode after copying thread
+        * flags and between before we held the sighand lock, we have
+        * to manually enable the seccomp thread flag here.
+        */
+       if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+               set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
        current->clear_child_tid = tidptr;
@@ -1121,6 +1197,32 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
        INIT_LIST_HEAD(&tsk->cpu_timers[2]);
 }
 
+#ifdef CONFIG_MTK_SCHED_CMP_TGS
+static void mt_init_thread_group(struct task_struct *p){
+#ifdef CONFIG_MT_SCHED_INFO
+       struct task_struct *tg = p->group_leader;
+#endif
+
+       p->thread_group_info[0].cfs_nr_running = 0;
+       p->thread_group_info[0].nr_running = 0 ;
+       p->thread_group_info[0].load_avg_ratio = 0;
+       p->thread_group_info[1].cfs_nr_running = 0;
+       p->thread_group_info[1].nr_running = 0;
+       p->thread_group_info[1].load_avg_ratio = 0;
+
+#ifdef CONFIG_MT_SCHED_INFO
+       mt_sched_printf("fork %d:%s %d:%s %lu %lu %lu, %lu %lu %lu",
+          tg->pid, tg->comm, p->pid, p->comm,
+          tg->thread_group_info[0].nr_running,
+          tg->thread_group_info[0].cfs_nr_running,
+          tg->thread_group_info[0].load_avg_ratio,
+          tg->thread_group_info[1].cfs_nr_running,
+          tg->thread_group_info[1].nr_running,
+          tg->thread_group_info[1].load_avg_ratio);
+#endif
+}
+#endif
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -1139,9 +1241,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        int retval;
        struct task_struct *p;
 
-       if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+       if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)){
+               printk("[%d:%s] fork fail at cpp 1, clone_flags:0x%x\n", current->pid, current->comm, (unsigned int)clone_flags);
                return ERR_PTR(-EINVAL);
-
+       }
        if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
                return ERR_PTR(-EINVAL);
 
@@ -1149,17 +1252,19 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * Thread groups must share signals as well, and detached threads
         * can only be started up within the thread group.
         */
-       if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
+       if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)){
+               printk("[%d:%s] fork fail at cpp 2, clone_flags:0x%x\n", current->pid, current->comm, (unsigned int)clone_flags);
                return ERR_PTR(-EINVAL);
-
+       }
        /*
         * Shared signal handlers imply shared VM. By way of the above,
         * thread groups also imply shared VM. Blocking this case allows
         * for various simplifications in other code.
         */
-       if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
+       if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)){
+               printk("[%d:%s] fork fail at cpp 3, clone_flags:0x%x\n", current->pid, current->comm, (unsigned int)clone_flags);
                return ERR_PTR(-EINVAL);
-
+       }
        /*
         * Siblings of global init remain as zombies on exit since they are
         * not reaped by their parent (swapper). To solve this and to avoid
@@ -1167,14 +1272,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         * from creating siblings.
         */
        if ((clone_flags & CLONE_PARENT) &&
-                               current->signal->flags & SIGNAL_UNKILLABLE)
+                               current->signal->flags & SIGNAL_UNKILLABLE){
+               printk("[%d:%s] fork fail at cpp 4, clone_flags:0x%x\n", current->pid, current->comm, (unsigned int)clone_flags);
                return ERR_PTR(-EINVAL);
-
+       }
        /*
-        * If the new process will be in a different pid namespace
-        * don't allow the creation of threads.
+        * If the new process will be in a different pid namespace don't
+        * allow it to share a thread group or signal handlers with the
+        * forking task.
         */
-       if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
+       if ((clone_flags & (CLONE_SIGHAND | CLONE_NEWPID)) &&
            (task_active_pid_ns(current) != current->nsproxy->pid_ns))
                return ERR_PTR(-EINVAL);
 
@@ -1184,13 +1291,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        retval = -ENOMEM;
        p = dup_task_struct(current);
-       if (!p)
+       if (!p){
+               printk("[%d:%s] fork fail at dup_task_struc, p=%p\n", current->pid, current->comm, p);
                goto fork_out;
+       }
 
        ftrace_graph_init_task(p);
-       get_seccomp_filter(p);
 
        rt_mutex_init_task(p);
+#ifdef CONFIG_MTK_SCHED_CMP_TGS
+       raw_spin_lock_init(&p->thread_group_info_lock);
+#endif
 
 #ifdef CONFIG_PROVE_LOCKING
        DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
@@ -1317,7 +1428,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_cleanup_policy;
        retval = audit_alloc(p);
        if (retval)
-               goto bad_fork_cleanup_policy;
+               goto bad_fork_cleanup_perf;
        /* copy all the process information */
        retval = copy_semundo(clone_flags, p);
        if (retval)
@@ -1414,6 +1525,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         */
        p->group_leader = p;
        INIT_LIST_HEAD(&p->thread_group);
+#ifdef CONFIG_MTK_SCHED_CMP_TGS
+       mt_init_thread_group(p);
+#endif
        p->task_works = NULL;
 
        /* Need tasklist lock for parent etc handling! */
@@ -1430,6 +1544,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        spin_lock(&current->sighand->siglock);
 
+       /*
+        * Copy seccomp details explicitly here, in case they were changed
+        * before holding sighand lock.
+        */
+       copy_seccomp(p);
+
        /*
         * Process group and session signals need to be delivered to just the
         * parent before the fork or both the parent and the child after the
@@ -1446,14 +1566,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_free_pid;
        }
 
-       if (clone_flags & CLONE_THREAD) {
-               current->signal->nr_threads++;
-               atomic_inc(&current->signal->live);
-               atomic_inc(&current->signal->sigcnt);
-               p->group_leader = current->group_leader;
-               list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-       }
-
        if (likely(p->pid)) {
                ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
@@ -1470,6 +1582,15 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        list_add_tail(&p->sibling, &p->real_parent->children);
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        __this_cpu_inc(process_counts);
+               } else {
+                       current->signal->nr_threads++;
+                       atomic_inc(&current->signal->live);
+                       atomic_inc(&current->signal->sigcnt);
+                       p->group_leader = current->group_leader;
+                       list_add_tail_rcu(&p->thread_group,
+                                         &p->group_leader->thread_group);
+                       list_add_tail_rcu(&p->thread_node,
+                                         &p->signal->thread_head);
                }
                attach_pid(p, PIDTYPE_PID, pid);
                nr_threads++;
@@ -1477,7 +1598,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        total_forks++;
        spin_unlock(&current->sighand->siglock);
+       syscall_tracepoint_update(p);
        write_unlock_irq(&tasklist_lock);
+
        proc_fork_connector(p);
        cgroup_post_fork(p);
        if (clone_flags & CLONE_THREAD)
@@ -1512,8 +1635,9 @@ bad_fork_cleanup_semundo:
        exit_sem(p);
 bad_fork_cleanup_audit:
        audit_free(p);
-bad_fork_cleanup_policy:
+bad_fork_cleanup_perf:
        perf_event_free_task(p);
+bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
        mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
@@ -1529,6 +1653,7 @@ bad_fork_cleanup_count:
 bad_fork_free:
        free_task(p);
 fork_out:
+       printk("[%d:%s] fork fail retval:0x%x\n", current->pid, current->comm, retval);
        return ERR_PTR(retval);
 }
 
@@ -1560,6 +1685,10 @@ struct task_struct * __cpuinit fork_idle(int cpu)
  * It copies the process, and if successful kick-starts
  * it and waits for it to finish using the VM if required.
  */
+#ifdef CONFIG_SCHEDSTATS
+/* mt shceduler profiling*/
+extern void save_mtproc_info(struct task_struct *p, unsigned long long ts);
+#endif
 long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              unsigned long stack_size,
@@ -1575,8 +1704,10 @@ long do_fork(unsigned long clone_flags,
         * actually start allocating stuff
         */
        if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
-               if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
+               if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) {
+                       printk("[%d:%s] fork fail at clone_thread, flags:0x%x\n", current->pid, current->comm, (unsigned int)clone_flags);
                        return -EINVAL;
+               }
        }
 
        /*
@@ -1605,10 +1736,12 @@ long do_fork(unsigned long clone_flags,
         */
        if (!IS_ERR(p)) {
                struct completion vfork;
+               struct pid *pid;
 
                trace_sched_process_fork(current, p);
 
-               nr = task_pid_vnr(p);
+               pid = get_task_pid(p, PIDTYPE_PID);
+               nr = pid_vnr(pid);
 
                if (clone_flags & CLONE_PARENT_SETTID)
                        put_user(nr, parent_tidptr);
@@ -1619,18 +1752,30 @@ long do_fork(unsigned long clone_flags,
                        get_task_struct(p);
                }
 
+#ifdef CONFIG_SCHEDSTATS
+        /* mt shceduler profiling*/
+        save_mtproc_info(p, sched_clock());    
+        printk(KERN_DEBUG "[%d:%s] fork [%d:%s]\n", current->pid, current->comm, p->pid, p->comm);
+#endif
                wake_up_new_task(p);
 
                /* forking complete and child started to run, tell ptracer */
                if (unlikely(trace))
-                       ptrace_event(trace, nr);
+                       ptrace_event_pid(trace, pid);
 
                if (clone_flags & CLONE_VFORK) {
                        if (!wait_for_vfork_done(p, &vfork))
-                               ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+                               ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
                }
+
+               put_pid(pid);
+#ifdef CONFIG_MT_PRIO_TRACER
+               create_prio_tracer(task_pid_nr(p));
+               update_prio_tracer(task_pid_nr(p), p->prio, p->policy, PTS_KRNL);
+#endif
        } else {
                nr = PTR_ERR(p);
+               printk("[%d:%s] fork fail:[%p, %d]\n", current->pid, current->comm, p,(int) nr);
        }
        return nr;
 }
@@ -1675,6 +1820,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
                 int __user *, parent_tidptr,
                 int __user *, child_tidptr,
                 int, tls_val)
+#elif defined(CONFIG_CLONE_BACKWARDS3)
+SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
+               int, stack_size,
+               int __user *, parent_tidptr,
+               int __user *, child_tidptr,
+               int, tls_val)
 #else
 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
                 int __user *, parent_tidptr,
@@ -1739,13 +1890,21 @@ static int check_unshare_flags(unsigned long unshare_flags)
                                CLONE_NEWUSER|CLONE_NEWPID))
                return -EINVAL;
        /*
-        * Not implemented, but pretend it works if there is nothing to
-        * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
-        * needs to unshare vm.
+        * Not implemented, but pretend it works if there is nothing
+        * to unshare.  Note that unsharing the address space or the
+        * signal handlers also need to unshare the signal queues (aka
+        * CLONE_THREAD).
         */
        if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
-               /* FIXME: get_task_mm() increments ->mm_users */
-               if (atomic_read(&current->mm->mm_users) > 1)
+               if (!thread_group_empty(current))
+                       return -EINVAL;
+       }
+       if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
+               if (atomic_read(&current->sighand->count) > 1)
+                       return -EINVAL;
+       }
+       if (unshare_flags & CLONE_VM) {
+               if (!current_is_single_threaded())
                        return -EINVAL;
        }
 
@@ -1818,16 +1977,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
         */
        if (unshare_flags & CLONE_NEWPID)
                unshare_flags |= CLONE_THREAD;
-       /*
-        * If unsharing a thread from a thread group, must also unshare vm.
-        */
-       if (unshare_flags & CLONE_THREAD)
-               unshare_flags |= CLONE_VM;
        /*
         * If unsharing vm, must also unshare signal handlers.
         */
        if (unshare_flags & CLONE_VM)
                unshare_flags |= CLONE_SIGHAND;
+       /*
+        * If unsharing a signal handlers, must also unshare the signal queues.
+        */
+       if (unshare_flags & CLONE_SIGHAND)
+               unshare_flags |= CLONE_THREAD;
        /*
         * If unsharing namespace, must also unshare filesystem information.
         */