import PULS_20160108
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / arm64 / kernel / topology.c
1 /*
2 * arch/arm/kernel/topology.c
3 *
4 * Copyright (C) 2011 Linaro Limited.
5 * Written by: Vincent Guittot
6 *
7 * based on arch/sh/kernel/topology.c
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive
11 * for more details.
12 */
13
14 #include <linux/cpu.h>
15 #include <linux/cpumask.h>
16 #include <linux/export.h>
17 #include <linux/init.h>
18 #include <linux/percpu.h>
19 #include <linux/node.h>
20 #include <linux/nodemask.h>
21 #include <linux/of.h>
22 #include <linux/sched.h>
23 #include <linux/slab.h>
24
25 #include <asm/cputype.h>
26 #include <asm/smp_plat.h>
27 #include <asm/topology.h>
28
29 /*
30 * cpu power scale management
31 */
32
33 /*
34 * cpu power table
35 * This per cpu data structure describes the relative capacity of each core.
36 * On a heteregenous system, cores don't have the same computation capacity
37 * and we reflect that difference in the cpu_power field so the scheduler can
38 * take this difference into account during load balance. A per cpu structure
39 * is preferred because each CPU updates its own cpu_power field during the
40 * load balance except for idle cores. One idle core is selected to run the
41 * rebalance_domains for all idle cores and the cpu_power can be updated
42 * during this sequence.
43 */
44
45 /* when CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY is in use, a new measure of
46 * compute capacity is available. This is limited to a maximum of 1024 and
47 * scaled between 0 and 1023 according to frequency.
48 * Cores with different base CPU powers are scaled in line with this.
49 * CPU capacity for each core represents a comparable ratio to maximum
50 * achievable core compute capacity for a core in this system.
51 *
52 * e.g.1 If all cores in the system have a base CPU power of 1024 according to
53 * efficiency calculations and are DVFS scalable between 500MHz and 1GHz, the
54 * cores currently at 1GHz will have CPU power of 1024 whilst the cores
55 * currently at 500MHz will have CPU power of 512.
56 *
57 * e.g.2
58 * If core 0 has a base CPU power of 2048 and runs at 500MHz & 1GHz whilst
59 * core 1 has a base CPU power of 1024 and runs at 100MHz and 200MHz, then
60 * the following possibilities are available:
61 *
62 * cpu power\| 1GHz:100Mhz | 1GHz : 200MHz | 500MHz:100MHz | 500MHz:200MHz |
63 * ----------|-------------|---------------|---------------|---------------|
64 * core 0 | 1024 | 1024 | 512 | 512 |
65 * core 1 | 256 | 512 | 256 | 512 |
66 *
67 * This information may be useful to the scheduler when load balancing,
68 * so that the compute capacity of the core a task ran on can be baked into
69 * task load histories.
70 */
71 static DEFINE_PER_CPU(unsigned long, cpu_scale);
72 #ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY
73 static DEFINE_PER_CPU(unsigned long, base_cpu_capacity);
74 static DEFINE_PER_CPU(unsigned long, invariant_cpu_capacity);
75 static DEFINE_PER_CPU(unsigned long, prescaled_cpu_capacity);
76 #endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */
77
78 static int frequency_invariant_power_enabled = 1;
79
80 /* >0=1, <=0=0 */
81 void arch_set_invariant_power_enabled(int val)
82 {
83 if(val>0)
84 frequency_invariant_power_enabled = 1;
85 else
86 frequency_invariant_power_enabled = 0;
87 }
88
89 int arch_get_invariant_power_enabled(void)
90 {
91 return frequency_invariant_power_enabled;
92 }
93
94 unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
95 {
96 return per_cpu(cpu_scale, cpu);
97 }
98
99 #ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY
100 unsigned long arch_get_cpu_capacity(int cpu)
101 {
102 return per_cpu(invariant_cpu_capacity, cpu);
103 }
104 unsigned long arch_get_max_cpu_capacity(int cpu)
105 {
106 return per_cpu(base_cpu_capacity, cpu);
107 }
108 #endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */
109
110 static void set_power_scale(unsigned int cpu, unsigned long power)
111 {
112 per_cpu(cpu_scale, cpu) = power;
113 }
114
115 #ifdef CONFIG_OF
116 struct cpu_efficiency {
117 const char *compatible;
118 unsigned long efficiency;
119 };
120
121 /*
122 * Table of relative efficiency of each processors
123 * The efficiency value must fit in 20bit and the final
124 * cpu_scale value must be in the range
125 * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
126 * in order to return at most 1 when DIV_ROUND_CLOSEST
127 * is used to compute the capacity of a CPU.
128 * Processors that are not defined in the table,
129 * use the default SCHED_POWER_SCALE value for cpu_scale.
130 */
131 struct cpu_efficiency table_efficiency[] = {
132 {"arm,cortex-a57", 3891},
133 {"arm,cortex-a53", 2048},
134 {NULL, },
135 };
136
137 struct cpu_capacity {
138 unsigned long hwid;
139 unsigned long capacity;
140 };
141
142 struct cpu_capacity *cpu_capacity;
143
144 unsigned long middle_capacity = 1;
145 /*
146 * Iterate all CPUs' descriptor in DT and compute the efficiency
147 * (as per table_efficiency). Also calculate a middle efficiency
148 * as close as possible to (max{eff_i} - min{eff_i}) / 2
149 * This is later used to scale the cpu_power field such that an
150 * 'average' CPU is of middle power. Also see the comments near
151 * table_efficiency[] and update_cpu_power().
152 */
153 static void __init parse_dt_topology(void)
154 {
155 struct cpu_efficiency *cpu_eff;
156 struct device_node *cn = NULL;
157 unsigned long min_capacity = (unsigned long)(-1);
158 unsigned long max_capacity = 0;
159 unsigned long capacity = 0;
160 int alloc_size, cpu = 0;
161
162 alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity);
163 cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
164
165 while ((cn = of_find_node_by_type(cn, "cpu"))) {
166 const u32 *rate, *reg;
167 int len;
168
169 if (cpu >= num_possible_cpus())
170 break;
171
172 for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
173 if (of_device_is_compatible(cn, cpu_eff->compatible))
174 break;
175
176 if (cpu_eff->compatible == NULL)
177 continue;
178
179 rate = of_get_property(cn, "clock-frequency", &len);
180 if (!rate || len != 4) {
181 pr_err("%s missing clock-frequency property\n",
182 cn->full_name);
183 continue;
184 }
185
186 reg = of_get_property(cn, "reg", &len);
187 if (!reg || len != 4) {
188 pr_err("%s missing reg property\n", cn->full_name);
189 continue;
190 }
191
192 capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
193
194 /* Save min capacity of the system */
195 if (capacity < min_capacity)
196 min_capacity = capacity;
197
198 /* Save max capacity of the system */
199 if (capacity > max_capacity)
200 max_capacity = capacity;
201
202 cpu_capacity[cpu].capacity = capacity;
203 cpu_capacity[cpu++].hwid = be32_to_cpup(reg);
204 }
205
206 if (cpu < num_possible_cpus())
207 cpu_capacity[cpu].hwid = (unsigned long)(-1);
208
209 /* If min and max capacities are equals, we bypass the update of the
210 * cpu_scale because all CPUs have the same capacity. Otherwise, we
211 * compute a middle_capacity factor that will ensure that the capacity
212 * of an 'average' CPU of the system will be as close as possible to
213 * SCHED_POWER_SCALE, which is the default value, but with the
214 * constraint explained near table_efficiency[].
215 */
216 if (min_capacity == max_capacity)
217 cpu_capacity[0].hwid = (unsigned long)(-1);
218 else if (4*max_capacity < (3*(max_capacity + min_capacity)))
219 middle_capacity = (min_capacity + max_capacity)
220 >> (SCHED_POWER_SHIFT+1);
221 else
222 middle_capacity = ((max_capacity / 3)
223 >> (SCHED_POWER_SHIFT-1)) + 1;
224
225 }
226
227 /*
228 * Look for a customed capacity of a CPU in the cpu_capacity table during the
229 * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
230 * function returns directly for SMP system.
231 */
232 void update_cpu_power(unsigned int cpu, unsigned long hwid)
233 {
234 unsigned int idx = 0;
235
236 /* look for the cpu's hwid in the cpu capacity table */
237 for (idx = 0; idx < num_possible_cpus(); idx++) {
238 if (cpu_capacity[idx].hwid == hwid)
239 break;
240
241 if (cpu_capacity[idx].hwid == -1)
242 return;
243 }
244
245 if (idx == num_possible_cpus())
246 return;
247
248 set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity);
249
250 printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
251 cpu, arch_scale_freq_power(NULL, cpu));
252 }
253
254 #else
255 static inline void parse_dt_topology(void) {}
256 static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
257 #endif
258
259 /*
260 * cpu topology table
261 */
262 struct cputopo_arm cpu_topology[NR_CPUS];
263 EXPORT_SYMBOL_GPL(cpu_topology);
264
265 #if defined (CONFIG_MTK_SCHED_CMP_PACK_SMALL_TASK) || defined (CONFIG_HMP_PACK_SMALL_TASK)
266 int arch_sd_share_power_line(void)
267 {
268 return 0*SD_SHARE_POWERLINE;
269 }
270 #endif /* CONFIG_MTK_SCHED_CMP_PACK_SMALL_TASK || CONFIG_HMP_PACK_SMALL_TASK */
271
272 const struct cpumask *cpu_coregroup_mask(int cpu)
273 {
274 return &cpu_topology[cpu].core_sibling;
275 }
276
277 void update_siblings_masks(unsigned int cpuid)
278 {
279 struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
280 int cpu;
281
282 /* update core and thread sibling masks */
283 for_each_possible_cpu(cpu) {
284 cpu_topo = &cpu_topology[cpu];
285
286 if (cpuid_topo->socket_id != cpu_topo->socket_id)
287 continue;
288
289 cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
290 if (cpu != cpuid)
291 cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
292
293 if (cpuid_topo->core_id != cpu_topo->core_id)
294 continue;
295
296 cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
297 if (cpu != cpuid)
298 cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
299 }
300 smp_wmb();
301 }
302
303 #ifdef CONFIG_MTK_CPU_TOPOLOGY
304
305 enum {
306 ARCH_UNKNOWN = 0,
307 ARCH_SINGLE_CLUSTER,
308 ARCH_MULTI_CLUSTER,
309 ARCH_BIG_LITTLE,
310 };
311
312 struct cpu_cluster {
313 int cluster_id;
314 cpumask_t siblings;
315 void *next;
316 };
317
318 struct cpu_compatible {
319 const char *name;
320 const unsigned int cpuidr;
321 struct cpu_cluster *cluster;
322 int clscnt;
323 };
324
325 struct cpu_arch_info {
326 struct cpu_compatible *compat_big;
327 struct cpu_compatible *compat_ltt;
328 bool arch_ready;
329 int arch_type;
330 int nr_clusters;
331 };
332
333 /* NOTE: absolute decending ordered by cpu capacity */
334 struct cpu_compatible cpu_compat_table[] = {
335 { "arm,cortex-a57", ARM_CPU_PART_CORTEX_A57, NULL, 0 },
336 { "arm,cortex-a53", ARM_CPU_PART_CORTEX_A53, NULL, 0 },
337 { NULL, 0, NULL, 0 }
338 };
339
340 static struct cpu_compatible* compat_cputopo[NR_CPUS];
341
342 static struct cpu_arch_info default_cpu_arch = {
343 NULL,
344 NULL,
345 0,
346 ARCH_UNKNOWN,
347 0,
348 };
349 static struct cpu_arch_info *glb_cpu_arch = &default_cpu_arch;
350
351 static int __arch_type(void)
352 {
353 int i, num_compat = 0;
354
355 if (!glb_cpu_arch->arch_ready)
356 return ARCH_UNKNOWN;
357
358 // return the cached setting if query more than once.
359 if (glb_cpu_arch->arch_type != ARCH_UNKNOWN)
360 return glb_cpu_arch->arch_type;
361
362 for (i = 0; i < ARRAY_SIZE(cpu_compat_table); i++) {
363 struct cpu_compatible *mc = &cpu_compat_table[i];
364 if (mc->clscnt != 0)
365 num_compat++;
366 }
367
368 if (num_compat > 1)
369 glb_cpu_arch->arch_type = ARCH_BIG_LITTLE;
370 else if (glb_cpu_arch->nr_clusters > 1)
371 glb_cpu_arch->arch_type = ARCH_MULTI_CLUSTER;
372 else if (num_compat == 1 && glb_cpu_arch->nr_clusters == 1)
373 glb_cpu_arch->arch_type = ARCH_SINGLE_CLUSTER;
374
375 return glb_cpu_arch->arch_type;
376 }
377
378 static DEFINE_SPINLOCK(__cpu_cluster_lock);
379 static void __setup_cpu_cluster(const unsigned int cpu,
380 struct cpu_compatible * const cpt,
381 const u32 mpidr)
382 {
383 struct cpu_cluster *prev_cls, *cls;
384 u32 cls_id = -1;
385
386 if (mpidr & MPIDR_MT_BITMASK)
387 cls_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
388 else
389 cls_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
390
391 spin_lock(&__cpu_cluster_lock);
392
393 cls = cpt->cluster;
394 prev_cls = cls;
395 while (cls) {
396 if (cls->cluster_id == cls_id)
397 break;
398 prev_cls = cls;
399 cls = (struct cpu_cluster *)cls->next;
400 }
401
402 if (!cls) {
403 cls = kzalloc(sizeof(struct cpu_cluster), GFP_ATOMIC);
404 BUG_ON(!cls);
405 cls->cluster_id = cls_id;
406 cpt->clscnt++;
407 glb_cpu_arch->nr_clusters++;
408 /* link it */
409 if (!cpt->cluster)
410 cpt->cluster = cls;
411 else
412 prev_cls->next = cls;
413 }
414 BUG_ON(cls->cluster_id != cls_id);
415
416 cpumask_set_cpu(cpu, &cls->siblings);
417 smp_wmb();
418
419 spin_unlock(&__cpu_cluster_lock);
420 }
421
422 static void setup_cputopo(const unsigned int cpu,
423 struct cpu_compatible * const cpt,
424 const u32 mpidr)
425
426 {
427 if (compat_cputopo[cpu])
428 return;
429
430 compat_cputopo[cpu] = cpt;
431
432 if (!glb_cpu_arch->compat_big || glb_cpu_arch->compat_big > cpt)
433 glb_cpu_arch->compat_big = cpt;
434
435 if (!glb_cpu_arch->compat_ltt || glb_cpu_arch->compat_ltt < cpt)
436 glb_cpu_arch->compat_ltt = cpt;
437
438 __setup_cpu_cluster(cpu, cpt, mpidr);
439 }
440
441 static void setup_cputopo_def(const unsigned int cpu)
442 {
443 struct cpu_compatible *idx = NULL;
444 unsigned int cpuidr = 0, mpidr;
445
446 BUG_ON(cpu != smp_processor_id());
447 cpuidr = read_cpuid_part_number();
448 mpidr = read_cpuid_mpidr();
449 for (idx = cpu_compat_table; idx->name; idx++) {
450 if (idx->cpuidr == cpuidr)
451 break;
452 }
453 BUG_ON(!idx || !idx->name);
454 setup_cputopo(cpu, idx, mpidr);
455 }
456
457 static void reset_cputopo(void)
458 {
459 struct cpu_compatible *idx;
460
461 memset(glb_cpu_arch, 0, sizeof(struct cpu_arch_info));
462 glb_cpu_arch->arch_type = ARCH_UNKNOWN;
463
464 memset(&compat_cputopo, 0, sizeof(compat_cputopo));
465
466 spin_lock(&__cpu_cluster_lock);
467 for (idx = cpu_compat_table; idx->name; idx++) {
468 struct cpu_cluster *curr, *next;
469
470 if (idx->clscnt == 0)
471 continue;
472 BUG_ON(!idx->cluster);
473
474 curr = idx->cluster;
475 next = (struct cpu_cluster *)curr->next;
476 kfree(curr);
477
478 while (next) {
479 curr = next;
480 next = (struct cpu_cluster *)curr->next;
481 kfree(curr);
482 }
483 idx->cluster = NULL;
484 idx->clscnt = 0;
485 }
486 spin_unlock(&__cpu_cluster_lock);
487 }
488
489 /* verify cpu topology correctness by device tree.
490 * This function is called when current CPU is cpuid!
491 */
492 static void verify_cputopo(const unsigned int cpuid, const u32 mpidr)
493 {
494 struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
495 struct cpu_compatible *cpt;
496 struct cpu_cluster *cls;
497
498 if (!glb_cpu_arch->arch_ready) {
499 int i;
500
501 setup_cputopo_def(cpuid);
502 for (i = 0; i < nr_cpu_ids; i++)
503 if (!compat_cputopo[i])
504 break;
505 if (i == nr_cpu_ids)
506 glb_cpu_arch->arch_ready = true;
507
508 return;
509 }
510
511 cpt = compat_cputopo[cpuid];
512 BUG_ON(!cpt);
513 cls = cpt->cluster;
514 while (cls) {
515 if (cpu_isset(cpuid, cls->siblings))
516 break;
517 cls = cls->next;
518 }
519 BUG_ON(!cls);
520 WARN(cls->cluster_id != cpuid_topo->socket_id,
521 "[%s] cpu id: %d, cluster id (%d) != socket id (%d)\n",
522 __func__, cpuid, cls->cluster_id, cpuid_topo->socket_id);
523 }
524
525 /*
526 * return 1 while every cpu is recognizible
527 */
528 void arch_build_cpu_topology_domain(void)
529 {
530 struct device_node *cn = NULL;
531 unsigned int cpu = 0;
532 u32 mpidr;
533
534 memset(&compat_cputopo, 0, sizeof(compat_cputopo));
535 // default by device tree parsing
536 while ((cn = of_find_node_by_type(cn, "cpu"))) {
537 struct cpu_compatible *idx;
538 const u32 *reg;
539 int len;
540
541 if (unlikely(cpu >= nr_cpu_ids)) {
542 pr_err("[CPUTOPO][%s] device tree cpu%d is over possible's\n",
543 __func__, cpu);
544 break;
545 }
546
547 for (idx = cpu_compat_table; idx->name; idx++)
548 if (of_device_is_compatible(cn, idx->name))
549 break;
550
551 if (!idx || !idx->name) {
552 int cplen;
553 const char *cp;
554 cp = (char *) of_get_property(cn, "compatible", &cplen);
555 pr_err("[CPUTOPO][%s] device tree cpu%d (%s) is not compatible!!\n",
556 __func__, cpu, cp);
557 break;
558 }
559
560 reg = of_get_property(cn, "reg", &len);
561 if (!reg || len != 4) {
562 pr_err("[CPUTOPO][%s] missing reg property\n", cn->full_name);
563 break;
564 }
565 mpidr = be32_to_cpup(reg);
566 setup_cputopo(cpu, idx, mpidr);
567 cpu++;
568 }
569 glb_cpu_arch->arch_ready = (cpu == nr_cpu_ids);
570
571 if (!glb_cpu_arch->arch_ready) {
572 pr_warn("[CPUTOPO][%s] build cpu topology failed, to be handled by mpidr/cpuidr regs!\n", __func__);
573 reset_cputopo();
574 setup_cputopo_def(smp_processor_id());
575 }
576 }
577
578 int arch_cpu_is_big(unsigned int cpu)
579 {
580 int type;
581
582 if (unlikely(cpu >= nr_cpu_ids))
583 BUG();
584
585 type = __arch_type();
586 switch(type) {
587 case ARCH_BIG_LITTLE:
588 return (compat_cputopo[cpu] == glb_cpu_arch->compat_big);
589 default:
590 /* treat as little */
591 return 0;
592 }
593 }
594
595 int arch_cpu_is_little(unsigned int cpu)
596 {
597 int type;
598
599 if (unlikely(cpu >= nr_cpu_ids))
600 BUG();
601
602 type = __arch_type();
603 switch(type) {
604 case ARCH_BIG_LITTLE:
605 return (compat_cputopo[cpu] == glb_cpu_arch->compat_ltt);
606 default:
607 /* treat as little */
608 return 1;
609 }
610 }
611
612 int arch_is_multi_cluster(void)
613 {
614 return (__arch_type() == ARCH_MULTI_CLUSTER || __arch_type() == ARCH_BIG_LITTLE);
615 }
616
617 int arch_is_big_little(void)
618 {
619 return (__arch_type() == ARCH_BIG_LITTLE);
620 }
621
622 int arch_get_nr_clusters(void)
623 {
624 return glb_cpu_arch->nr_clusters;
625 }
626
627 int arch_get_cluster_id(unsigned int cpu)
628 {
629 struct cputopo_arm *arm_cputopo = &cpu_topology[cpu];
630 struct cpu_compatible *cpt;
631 struct cpu_cluster *cls;
632
633 BUG_ON(cpu >= nr_cpu_ids);
634 if (!glb_cpu_arch->arch_ready) {
635 WARN_ONCE(!glb_cpu_arch->arch_ready, "[CPUTOPO][%s] cpu(%d), socket_id(%d) topology is not ready!\n",
636 __func__, cpu, arm_cputopo->socket_id);
637 if (unlikely(arm_cputopo->socket_id < 0))
638 return 0;
639 return arm_cputopo->socket_id;
640 }
641
642 cpt = compat_cputopo[cpu];
643 BUG_ON(!cpt);
644 cls = cpt->cluster;
645 while (cls) {
646 if (cpu_isset(cpu, cls->siblings))
647 break;
648 cls = cls->next;
649 }
650 BUG_ON(!cls);
651 WARN_ONCE(cls->cluster_id != arm_cputopo->socket_id, "[CPUTOPO][%s] cpu(%d): cluster_id(%d) != socket_id(%d) !\n",
652 __func__, cpu, cls->cluster_id, arm_cputopo->socket_id);
653
654 return cls->cluster_id;
655 }
656
657 static struct cpu_cluster *__get_cluster_slowpath(int cluster_id)
658 {
659 int i = 0;
660 struct cpu_compatible *cpt;
661 struct cpu_cluster *cls;
662
663 for (i = 0; i < nr_cpu_ids; i++) {
664 cpt = compat_cputopo[i];
665 BUG_ON(!cpt);
666 cls = cpt->cluster;
667 while (cls) {
668 if (cls->cluster_id == cluster_id)
669 return cls;
670 cls = cls->next;
671 }
672 }
673 return NULL;
674 }
675
676 void arch_get_cluster_cpus(struct cpumask *cpus, int cluster_id)
677 {
678 struct cpu_cluster *cls = NULL;
679
680 cpumask_clear(cpus);
681
682 if (likely(glb_cpu_arch->compat_ltt)) {
683 cls = glb_cpu_arch->compat_ltt->cluster;
684 while (cls) {
685 if (cls->cluster_id == cluster_id)
686 goto found;
687 cls = cls->next;
688 }
689 }
690 if (likely(glb_cpu_arch->compat_big)) {
691 cls = glb_cpu_arch->compat_big->cluster;
692 while (cls) {
693 if (cls->cluster_id == cluster_id)
694 goto found;
695 cls = cls->next;
696 }
697 }
698
699 cls = __get_cluster_slowpath(cluster_id);
700 BUG_ON(!cls); // debug only.. remove later...
701 if (!cls)
702 return;
703
704 found:
705 cpumask_copy(cpus, &cls->siblings);
706 }
707
708 /*
709 * arch_get_big_little_cpus - get big/LITTLE cores in cpumask
710 * @big: the cpumask pointer of big cores
711 * @little: the cpumask pointer of little cores
712 *
713 * Treat it as little cores, if it's not big.LITTLE architecture
714 */
715 void arch_get_big_little_cpus(struct cpumask *big, struct cpumask *little)
716 {
717 int type;
718 struct cpu_cluster *cls = NULL;
719 struct cpumask tmpmask;
720 unsigned int cpu;
721
722 if (unlikely(!glb_cpu_arch->arch_ready))
723 BUG();
724
725 type = __arch_type();
726 spin_lock(&__cpu_cluster_lock);
727 switch(type) {
728 case ARCH_BIG_LITTLE:
729 if (likely(1 == glb_cpu_arch->compat_big->clscnt)) {
730 cls = glb_cpu_arch->compat_big->cluster;
731 cpumask_copy(big, &cls->siblings);
732 } else {
733 cls = glb_cpu_arch->compat_big->cluster;
734 while (cls) {
735 cpumask_or(&tmpmask, big, &cls->siblings);
736 cpumask_copy(big, &tmpmask);
737 cls = cls->next;
738 }
739 }
740 if (likely(1 == glb_cpu_arch->compat_ltt->clscnt)) {
741 cls = glb_cpu_arch->compat_ltt->cluster;
742 cpumask_copy(little, &cls->siblings);
743 } else {
744 cls = glb_cpu_arch->compat_ltt->cluster;
745 while (cls) {
746 cpumask_or(&tmpmask, little, &cls->siblings);
747 cpumask_copy(little, &tmpmask);
748 cls = cls->next;
749 }
750 }
751 break;
752 default:
753 /* treat as little */
754 cpumask_clear(big);
755 cpumask_clear(little);
756 for_each_possible_cpu(cpu)
757 cpumask_set_cpu(cpu, little);
758 }
759 spin_unlock(&__cpu_cluster_lock);
760 }
761 #else /* !CONFIG_MTK_CPU_TOPOLOGY */
762 int arch_cpu_is_big(unsigned int cpu) { return 0; }
763 int arch_cpu_is_little(unsigned int cpu) { return 1; }
764 int arch_is_big_little(void) { return 0; }
765
766 int arch_get_nr_clusters(void)
767 {
768 int max_id = 0;
769 unsigned int cpu;
770
771 // assume socket id is monotonic increasing without gap.
772 for_each_possible_cpu(cpu) {
773 struct cputopo_arm *arm_cputopo = &cpu_topology[cpu];
774 if (arm_cputopo->socket_id > max_id)
775 max_id = arm_cputopo->socket_id;
776 }
777 return max_id+1;
778 }
779
780 int arch_is_multi_cluster(void)
781 {
782 return (arch_get_nr_clusters() > 1 ? 1 : 0);
783 }
784
785 int arch_get_cluster_id(unsigned int cpu)
786 {
787 struct cputopo_arm *arm_cputopo = &cpu_topology[cpu];
788 return arm_cputopo->socket_id < 0 ? 0 : arm_cputopo->socket_id;
789 }
790
791 void arch_get_cluster_cpus(struct cpumask *cpus, int cluster_id)
792 {
793 unsigned int cpu, found_id = -1;
794
795 for_each_possible_cpu(cpu) {
796 struct cputopo_arm *arm_cputopo = &cpu_topology[cpu];
797 if (arm_cputopo->socket_id == cluster_id) {
798 found_id = cluster_id;
799 break;
800 }
801 }
802 if (-1 == found_id || cluster_to_logical_mask(found_id, cpus)) {
803 cpumask_clear(cpus);
804 for_each_possible_cpu(cpu)
805 cpumask_set_cpu(cpu, cpus);
806 }
807 }
808 void arch_get_big_little_cpus(struct cpumask *big, struct cpumask *little)
809 {
810 unsigned int cpu;
811 cpumask_clear(big);
812 cpumask_clear(little);
813 for_each_possible_cpu(cpu)
814 cpumask_set_cpu(cpu, little);
815 }
816 #endif /* CONFIG_MTK_CPU_TOPOLOGY */
817
818 /*
819 * store_cpu_topology is called at boot when only one cpu is running
820 * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
821 * which prevents simultaneous write access to cpu_topology array
822 */
823 void store_cpu_topology(unsigned int cpuid)
824 {
825 struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
826 unsigned int mpidr;
827
828 /* If the cpu topology has been already set, just return */
829 if (cpuid_topo->core_id != -1)
830 return;
831
832 mpidr = read_cpuid_mpidr();
833
834 /* create cpu topology mapping */
835 if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) {
836 /*
837 * This is a multiprocessor system
838 * multiprocessor format & multiprocessor mode field are set
839 */
840
841 if (mpidr & MPIDR_MT_BITMASK) {
842 /* core performance interdependency */
843 cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
844 cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
845 cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
846 } else {
847 /* largely independent cores */
848 cpuid_topo->thread_id = -1;
849 cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
850 cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
851 }
852 } else {
853 /*
854 * This is an uniprocessor system
855 * we are in multiprocessor format but uniprocessor system
856 * or in the old uniprocessor format
857 */
858 cpuid_topo->thread_id = -1;
859 cpuid_topo->core_id = 0;
860 cpuid_topo->socket_id = -1;
861 }
862
863 #ifdef CONFIG_MTK_CPU_TOPOLOGY
864 verify_cputopo(cpuid, (u32)mpidr);
865 #endif
866
867 update_siblings_masks(cpuid);
868
869 update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK);
870
871 printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
872 cpuid, cpu_topology[cpuid].thread_id,
873 cpu_topology[cpuid].core_id,
874 cpu_topology[cpuid].socket_id, mpidr);
875 }
876
877 /*
878 * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
879 * @socket_id: cluster HW identifier
880 * @cluster_mask: the cpumask location to be initialized, modified by the
881 * function only if return value == 0
882 *
883 * Return:
884 *
885 * 0 on success
886 * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
887 */
888 int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
889 {
890 int cpu;
891
892 if (!cluster_mask)
893 return -EINVAL;
894
895 for_each_online_cpu(cpu)
896 if (socket_id == topology_physical_package_id(cpu)) {
897 cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
898 return 0;
899 }
900
901 return -EINVAL;
902 }
903
904 #ifdef CONFIG_SCHED_HMP
905 static const char * const little_cores[] = {
906 "arm,cortex-a53",
907 NULL,
908 };
909
910 static bool is_little_cpu(struct device_node *cn)
911 {
912 const char * const *lc;
913 for (lc = little_cores; *lc; lc++)
914 if (of_device_is_compatible(cn, *lc)) {
915 return true;
916 }
917 return false;
918 }
919
920 void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
921 struct cpumask *slow)
922 {
923 struct device_node *cn = NULL;
924 int cpu;
925
926 cpumask_clear(fast);
927 cpumask_clear(slow);
928
929 /*
930 * Use the config options if they are given. This helps testing
931 * HMP scheduling on systems without a big.LITTLE architecture.
932 */
933 if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
934 if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
935 WARN(1, "Failed to parse HMP fast cpu mask!\n");
936 if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
937 WARN(1, "Failed to parse HMP slow cpu mask!\n");
938 return;
939 }
940
941 /*
942 * Else, parse device tree for little cores.
943 */
944 while ((cn = of_find_node_by_type(cn, "cpu"))) {
945
946 const u32 *mpidr;
947 int len;
948
949 mpidr = of_get_property(cn, "reg", &len);
950 if (!mpidr || len != 4) {
951 pr_err("* %s missing reg property\n", cn->full_name);
952 continue;
953 }
954
955 cpu = get_logical_index(be32_to_cpup(mpidr));
956 if (cpu == -EINVAL) {
957 pr_err("couldn't get logical index for mpidr %x\n",
958 be32_to_cpup(mpidr));
959 break;
960 }
961
962 if (is_little_cpu(cn))
963 cpumask_set_cpu(cpu, slow);
964 else
965 cpumask_set_cpu(cpu, fast);
966 }
967
968 if (!cpumask_empty(fast) && !cpumask_empty(slow))
969 return;
970
971 /*
972 * We didn't find both big and little cores so let's call all cores
973 * fast as this will keep the system running, with all cores being
974 * treated equal.
975 */
976 cpumask_setall(fast);
977 cpumask_clear(slow);
978 }
979
980 struct cpumask hmp_fast_cpu_mask;
981 struct cpumask hmp_slow_cpu_mask;
982
983 void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
984 {
985 struct hmp_domain *domain;
986
987 arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
988
989 /*
990 * Initialize hmp_domains
991 * Must be ordered with respect to compute capacity.
992 * Fastest domain at head of list.
993 */
994 if(!cpumask_empty(&hmp_slow_cpu_mask)) {
995 domain = (struct hmp_domain *)
996 kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
997 cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
998 cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
999 list_add(&domain->hmp_domains, hmp_domains_list);
1000 }
1001 domain = (struct hmp_domain *)
1002 kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
1003 cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
1004 cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
1005 list_add(&domain->hmp_domains, hmp_domains_list);
1006 }
1007 #endif /* CONFIG_SCHED_HMP */
1008
1009 /*
1010 * init_cpu_topology is called at boot when only one cpu is running
1011 * which prevent simultaneous write access to cpu_topology array
1012 */
1013 void __init init_cpu_topology(void)
1014 {
1015 unsigned int cpu;
1016
1017 /* init core mask and power*/
1018 for_each_possible_cpu(cpu) {
1019 struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
1020
1021 cpu_topo->thread_id = -1;
1022 cpu_topo->core_id = -1;
1023 cpu_topo->socket_id = -1;
1024 cpumask_clear(&cpu_topo->core_sibling);
1025 cpumask_clear(&cpu_topo->thread_sibling);
1026
1027 set_power_scale(cpu, SCHED_POWER_SCALE);
1028 }
1029 smp_wmb();
1030
1031 parse_dt_topology();
1032 }
1033
1034
1035 #ifdef CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY
1036 #include <linux/cpufreq.h>
1037 #define ARCH_SCALE_INVA_CPU_CAP_PERCLS 1
1038
1039 struct cpufreq_extents {
1040 u32 max;
1041 u32 flags;
1042 u32 const_max;
1043 u32 throttling;
1044 };
1045 /* Flag set when the governor in use only allows one frequency.
1046 * Disables scaling.
1047 */
1048 #define CPUPOWER_FREQINVAR_SINGLEFREQ 0x01
1049 static struct cpufreq_extents freq_scale[CONFIG_NR_CPUS];
1050
1051 static unsigned long get_max_cpu_power(void)
1052 {
1053 unsigned long max_cpu_power = 0;
1054 int cpu;
1055 for_each_online_cpu(cpu){
1056 if( per_cpu(cpu_scale, cpu) > max_cpu_power)
1057 max_cpu_power = per_cpu(cpu_scale, cpu);
1058 }
1059 return max_cpu_power;
1060 }
1061
1062 int arch_get_cpu_throttling(int cpu)
1063 {
1064 return freq_scale[cpu].throttling;
1065 }
1066
1067 /* Called when the CPU Frequency is changed.
1068 * Once for each CPU.
1069 */
1070 static int cpufreq_callback(struct notifier_block *nb,
1071 unsigned long val, void *data)
1072 {
1073 struct cpufreq_freqs *freq = data;
1074 int cpu = freq->cpu;
1075 struct cpufreq_extents *extents;
1076 unsigned int curr_freq;
1077 #ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS
1078 int i = 0;
1079 #endif
1080
1081 if (freq->flags & CPUFREQ_CONST_LOOPS)
1082 return NOTIFY_OK;
1083
1084 if (val != CPUFREQ_POSTCHANGE)
1085 return NOTIFY_OK;
1086
1087 /* if dynamic load scale is disabled, set the load scale to 1.0 */
1088 if (!frequency_invariant_power_enabled) {
1089 per_cpu(invariant_cpu_capacity, cpu) = per_cpu(base_cpu_capacity, cpu);
1090 return NOTIFY_OK;
1091 }
1092
1093 extents = &freq_scale[cpu];
1094 if (extents->max < extents->const_max) {
1095 extents->throttling = 1;
1096 } else {
1097 extents->throttling = 0;
1098 }
1099 /* If our governor was recognised as a single-freq governor,
1100 * use curr = max to be sure multiplier is 1.0
1101 */
1102 if (extents->flags & CPUPOWER_FREQINVAR_SINGLEFREQ)
1103 curr_freq = extents->max >> CPUPOWER_FREQSCALE_SHIFT;
1104 else
1105 curr_freq = freq->new >> CPUPOWER_FREQSCALE_SHIFT;
1106
1107 #ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS
1108 for_each_cpu(i, topology_core_cpumask(cpu)) {
1109 per_cpu(invariant_cpu_capacity, i) = DIV_ROUND_UP(
1110 (curr_freq * per_cpu(prescaled_cpu_capacity, i)), CPUPOWER_FREQSCALE_DEFAULT);
1111 }
1112 #else
1113 per_cpu(invariant_cpu_capacity, cpu) = DIV_ROUND_UP(
1114 (curr_freq * per_cpu(prescaled_cpu_capacity, cpu)), CPUPOWER_FREQSCALE_DEFAULT);
1115 #endif
1116 return NOTIFY_OK;
1117 }
1118
1119 /* Called when the CPUFreq governor is changed.
1120 * Only called for the CPUs which are actually changed by the
1121 * userspace.
1122 */
1123 static int cpufreq_policy_callback(struct notifier_block *nb,
1124 unsigned long event, void *data)
1125 {
1126 struct cpufreq_policy *policy = data;
1127 struct cpufreq_extents *extents;
1128 int cpu, singleFreq = 0, cpu_capacity;
1129 static const char performance_governor[] = "performance";
1130 static const char powersave_governor[] = "powersave";
1131 unsigned long max_cpu_power;
1132 #ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS
1133 int i = 0;
1134 #endif
1135
1136 if (event == CPUFREQ_START)
1137 return 0;
1138
1139 if (event != CPUFREQ_INCOMPATIBLE)
1140 return 0;
1141
1142 /* CPUFreq governors do not accurately report the range of
1143 * CPU Frequencies they will choose from.
1144 * We recognise performance and powersave governors as
1145 * single-frequency only.
1146 */
1147 if (!strncmp(policy->governor->name, performance_governor,
1148 strlen(performance_governor)) ||
1149 !strncmp(policy->governor->name, powersave_governor,
1150 strlen(powersave_governor)))
1151 singleFreq = 1;
1152
1153 max_cpu_power = get_max_cpu_power();
1154 /* Make sure that all CPUs impacted by this policy are
1155 * updated since we will only get a notification when the
1156 * user explicitly changes the policy on a CPU.
1157 */
1158 for_each_cpu(cpu, policy->cpus) {
1159 /* scale cpu_power to max(1024) */
1160 cpu_capacity = (per_cpu(cpu_scale, cpu) << CPUPOWER_FREQSCALE_SHIFT)
1161 / max_cpu_power;
1162 extents = &freq_scale[cpu];
1163 extents->max = policy->max >> CPUPOWER_FREQSCALE_SHIFT;
1164 extents->const_max = policy->cpuinfo.max_freq >> CPUPOWER_FREQSCALE_SHIFT;
1165 if (!frequency_invariant_power_enabled) {
1166 /* when disabled, invariant_cpu_scale = cpu_scale */
1167 per_cpu(base_cpu_capacity, cpu) = CPUPOWER_FREQSCALE_DEFAULT;
1168 per_cpu(invariant_cpu_capacity, cpu) = CPUPOWER_FREQSCALE_DEFAULT;
1169 /* unused when disabled */
1170 per_cpu(prescaled_cpu_capacity, cpu) = CPUPOWER_FREQSCALE_DEFAULT;
1171 } else {
1172 if (singleFreq)
1173 extents->flags |= CPUPOWER_FREQINVAR_SINGLEFREQ;
1174 else
1175 extents->flags &= ~CPUPOWER_FREQINVAR_SINGLEFREQ;
1176 per_cpu(base_cpu_capacity, cpu) = cpu_capacity;
1177 #ifdef CONFIG_SCHED_HMP_ENHANCEMENT
1178 per_cpu(prescaled_cpu_capacity, cpu) =
1179 ((cpu_capacity << CPUPOWER_FREQSCALE_SHIFT) / extents->const_max);
1180 #else
1181 per_cpu(prescaled_cpu_capacity, cpu) =
1182 ((cpu_capacity << CPUPOWER_FREQSCALE_SHIFT) / extents->max);
1183 #endif
1184
1185 #ifdef ARCH_SCALE_INVA_CPU_CAP_PERCLS
1186 for_each_cpu(i, topology_core_cpumask(cpu)) {
1187 per_cpu(invariant_cpu_capacity, i) = DIV_ROUND_UP(
1188 ((policy->cur>>CPUPOWER_FREQSCALE_SHIFT) *
1189 per_cpu(prescaled_cpu_capacity, i)), CPUPOWER_FREQSCALE_DEFAULT);
1190 }
1191 #else
1192 per_cpu(invariant_cpu_capacity, cpu) = DIV_ROUND_UP(
1193 ((policy->cur>>CPUPOWER_FREQSCALE_SHIFT) *
1194 per_cpu(prescaled_cpu_capacity, cpu)), CPUPOWER_FREQSCALE_DEFAULT);
1195 #endif
1196 }
1197 }
1198 return 0;
1199 }
1200
1201 static struct notifier_block cpufreq_notifier = {
1202 .notifier_call = cpufreq_callback,
1203 };
1204 static struct notifier_block cpufreq_policy_notifier = {
1205 .notifier_call = cpufreq_policy_callback,
1206 };
1207
1208 static int __init register_topology_cpufreq_notifier(void)
1209 {
1210 int ret;
1211
1212 /* init safe defaults since there are no policies at registration */
1213 for (ret = 0; ret < CONFIG_NR_CPUS; ret++) {
1214 /* safe defaults */
1215 freq_scale[ret].max = CPUPOWER_FREQSCALE_DEFAULT;
1216 per_cpu(base_cpu_capacity, ret) = CPUPOWER_FREQSCALE_DEFAULT;
1217 per_cpu(invariant_cpu_capacity, ret) = CPUPOWER_FREQSCALE_DEFAULT;
1218 per_cpu(prescaled_cpu_capacity, ret) = CPUPOWER_FREQSCALE_DEFAULT;
1219 }
1220
1221 pr_info("topology: registering cpufreq notifiers for scale-invariant CPU Power\n");
1222 ret = cpufreq_register_notifier(&cpufreq_policy_notifier,
1223 CPUFREQ_POLICY_NOTIFIER);
1224
1225 if (ret != -EINVAL)
1226 ret = cpufreq_register_notifier(&cpufreq_notifier,
1227 CPUFREQ_TRANSITION_NOTIFIER);
1228
1229 return ret;
1230 }
1231
1232 core_initcall(register_topology_cpufreq_notifier);
1233 #endif /* CONFIG_ARCH_SCALE_INVARIANT_CPU_CAPACITY */