usb: gadget: f_mtp: Avoid race between mtp_read and mtp_function_disable
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / mm / slub.c
... / ...
CommitLineData
1/*
2 * SLUB: A slab allocator that limits cache line use instead of queuing
3 * objects in per cpu and per node lists.
4 *
5 * The allocator synchronizes using per slab locks or atomic operatios
6 * and only uses a centralized lock to manage a pool of partial slabs.
7 *
8 * (C) 2007 SGI, Christoph Lameter
9 * (C) 2011 Linux Foundation, Christoph Lameter
10 */
11
12#include <linux/mm.h>
13#include <linux/swap.h> /* struct reclaim_state */
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/notifier.h>
22#include <linux/seq_file.h>
23#include <linux/kasan.h>
24#include <linux/kmemcheck.h>
25#include <linux/cpu.h>
26#include <linux/cpuset.h>
27#include <linux/mempolicy.h>
28#include <linux/ctype.h>
29#include <linux/debugobjects.h>
30#include <linux/kallsyms.h>
31#include <linux/memory.h>
32#include <linux/math64.h>
33#include <linux/fault-inject.h>
34#include <linux/stacktrace.h>
35#include <linux/prefetch.h>
36#include <linux/memcontrol.h>
37#ifdef CONFIG_SEC_DEBUG_AUTO_SUMMARY
38#include <linux/sec_debug.h>
39#endif
40
41#ifdef CONFIG_RKP_KDP
42#include <linux/security.h>
43
44spinlock_t ro_pages_lock = __SPIN_LOCK_UNLOCKED();
45
46#define check_cred_cache(s,r) \
47do { \
48 if ((s->name) && (!strcmp(s->name,CRED_JAR_RO) || !strcmp(s->name,TSEC_JAR) || !strcmp(s->name,VFSMNT_JAR) )) \
49 return r; \
50} while (0)
51#else
52#define check_cred_cache(s,r)
53#endif /* CONFIG_RKP_KDP */
54#include <trace/events/kmem.h>
55
56#include "internal.h"
57
58/*
59 * Lock order:
60 * 1. slab_mutex (Global Mutex)
61 * 2. node->list_lock
62 * 3. slab_lock(page) (Only on some arches and for debugging)
63 *
64 * slab_mutex
65 *
66 * The role of the slab_mutex is to protect the list of all the slabs
67 * and to synchronize major metadata changes to slab cache structures.
68 *
69 * The slab_lock is only used for debugging and on arches that do not
70 * have the ability to do a cmpxchg_double. It only protects the second
71 * double word in the page struct. Meaning
72 * A. page->freelist -> List of object free in a page
73 * B. page->counters -> Counters of objects
74 * C. page->frozen -> frozen state
75 *
76 * If a slab is frozen then it is exempt from list management. It is not
77 * on any list. The processor that froze the slab is the one who can
78 * perform list operations on the page. Other processors may put objects
79 * onto the freelist but the processor that froze the slab is the only
80 * one that can retrieve the objects from the page's freelist.
81 *
82 * The list_lock protects the partial and full list on each node and
83 * the partial slab counter. If taken then no new slabs may be added or
84 * removed from the lists nor make the number of partial slabs be modified.
85 * (Note that the total number of slabs is an atomic value that may be
86 * modified without taking the list lock).
87 *
88 * The list_lock is a centralized lock and thus we avoid taking it as
89 * much as possible. As long as SLUB does not have to handle partial
90 * slabs, operations can continue without any centralized lock. F.e.
91 * allocating a long series of objects that fill up slabs does not require
92 * the list lock.
93 * Interrupts are disabled during allocation and deallocation in order to
94 * make the slab allocator safe to use in the context of an irq. In addition
95 * interrupts are disabled to ensure that the processor does not change
96 * while handling per_cpu slabs, due to kernel preemption.
97 *
98 * SLUB assigns one slab for allocation to each processor.
99 * Allocations only occur from these slabs called cpu slabs.
100 *
101 * Slabs with free elements are kept on a partial list and during regular
102 * operations no list for full slabs is used. If an object in a full slab is
103 * freed then the slab will show up again on the partial lists.
104 * We track full slabs for debugging purposes though because otherwise we
105 * cannot scan all objects.
106 *
107 * Slabs are freed when they become empty. Teardown and setup is
108 * minimal so we rely on the page allocators per cpu caches for
109 * fast frees and allocs.
110 *
111 * Overloading of page flags that are otherwise used for LRU management.
112 *
113 * PageActive The slab is frozen and exempt from list processing.
114 * This means that the slab is dedicated to a purpose
115 * such as satisfying allocations for a specific
116 * processor. Objects may be freed in the slab while
117 * it is frozen but slab_free will then skip the usual
118 * list operations. It is up to the processor holding
119 * the slab to integrate the slab into the slab lists
120 * when the slab is no longer needed.
121 *
122 * One use of this flag is to mark slabs that are
123 * used for allocations. Then such a slab becomes a cpu
124 * slab. The cpu slab may be equipped with an additional
125 * freelist that allows lockless access to
126 * free objects in addition to the regular freelist
127 * that requires the slab lock.
128 *
129 * PageError Slab requires special handling due to debug
130 * options set. This moves slab handling out of
131 * the fast path and disables lockless freelists.
132 */
133
134static inline int kmem_cache_debug(struct kmem_cache *s)
135{
136#ifdef CONFIG_SLUB_DEBUG
137 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
138#else
139 return 0;
140#endif
141}
142
143static inline void *fixup_red_left(struct kmem_cache *s, void *p)
144{
145 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
146 p += s->red_left_pad;
147
148 return p;
149}
150
151static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
152{
153#ifdef CONFIG_SLUB_CPU_PARTIAL
154 return !kmem_cache_debug(s);
155#else
156 return false;
157#endif
158}
159
160/*
161 * Issues still to be resolved:
162 *
163 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
164 *
165 * - Variable sizing of the per node arrays
166 */
167
168/* Enable to test recovery from slab corruption on boot */
169#undef SLUB_RESILIENCY_TEST
170
171/* Enable to log cmpxchg failures */
172#undef SLUB_DEBUG_CMPXCHG
173
174/*
175 * Mininum number of partial slabs. These will be left on the partial
176 * lists even if they are empty. kmem_cache_shrink may reclaim them.
177 */
178#define MIN_PARTIAL 5
179
180/*
181 * Maximum number of desirable partial slabs.
182 * The existence of more partial slabs makes kmem_cache_shrink
183 * sort the partial list by the number of objects in use.
184 */
185#define MAX_PARTIAL 10
186
187#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
188 SLAB_POISON | SLAB_STORE_USER)
189
190/*
191 * Debugging flags that require metadata to be stored in the slab. These get
192 * disabled when slub_debug=O is used and a cache's min order increases with
193 * metadata.
194 */
195#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
196
197#define OO_SHIFT 16
198#define OO_MASK ((1 << OO_SHIFT) - 1)
199#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
200
201/* Internal SLUB flags */
202#define __OBJECT_POISON 0x80000000UL /* Poison object */
203#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
204
205#ifdef CONFIG_SMP
206static struct notifier_block slab_notifier;
207#endif
208
209/*
210 * Tracking user of a slab.
211 */
212#define TRACK_ADDRS_COUNT 16
213struct track {
214 unsigned long addr; /* Called from address */
215#ifdef CONFIG_STACKTRACE
216 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
217#endif
218 int cpu; /* Was running on cpu */
219 int pid; /* Pid context */
220 unsigned long when; /* When did the operation occur */
221};
222
223enum track_item { TRACK_ALLOC, TRACK_FREE };
224
225#ifdef CONFIG_SYSFS
226static int sysfs_slab_add(struct kmem_cache *);
227static int sysfs_slab_alias(struct kmem_cache *, const char *);
228static void memcg_propagate_slab_attrs(struct kmem_cache *s);
229#else
230static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
231static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
232 { return 0; }
233static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
234#endif
235
236static inline void stat(const struct kmem_cache *s, enum stat_item si)
237{
238#ifdef CONFIG_SLUB_STATS
239 /*
240 * The rmw is racy on a preemptible kernel but this is acceptable, so
241 * avoid this_cpu_add()'s irq-disable overhead.
242 */
243 raw_cpu_inc(s->cpu_slab->stat[si]);
244#endif
245}
246
247/********************************************************************
248 * Core slab cache functions
249 *******************************************************************/
250
251static inline void *get_freepointer(struct kmem_cache *s, void *object)
252{
253 return *(void **)(object + s->offset);
254}
255
256static void prefetch_freepointer(const struct kmem_cache *s, void *object)
257{
258 prefetch(object + s->offset);
259}
260
261static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
262{
263 void *p;
264
265#ifdef CONFIG_DEBUG_PAGEALLOC
266 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
267#else
268 p = get_freepointer(s, object);
269#endif
270 return p;
271}
272
273static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
274{
275#ifdef CONFIG_RKP_KDP
276 if (rkp_cred_enable && s->name &&
277 (!strcmp(s->name, CRED_JAR_RO)|| !strcmp(s->name, TSEC_JAR) ||
278 !strcmp(s->name, VFSMNT_JAR))) {
279 rkp_call(RKP_CMDID(0x44),(unsigned long long) object, (unsigned long long) s->offset,
280 (unsigned long long) fp,0,0);
281 }
282 else
283#endif /*CONFIG_RKP_KDP*/
284 *(void **)(object + s->offset) = fp;
285}
286
287/* Loop over all objects in a slab */
288#define for_each_object(__p, __s, __addr, __objects) \
289 for (__p = fixup_red_left(__s, __addr); \
290 __p < (__addr) + (__objects) * (__s)->size; \
291 __p += (__s)->size)
292
293#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
294 for (__p = fixup_red_left(__s, __addr), __idx = 1; \
295 __idx <= __objects; \
296 __p += (__s)->size, __idx++)
297
298/* Determine object index from a given position */
299static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
300{
301 return (p - addr) / s->size;
302}
303
304static inline size_t slab_ksize(const struct kmem_cache *s)
305{
306#ifdef CONFIG_SLUB_DEBUG
307 /*
308 * Debugging requires use of the padding between object
309 * and whatever may come after it.
310 */
311 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
312 return s->object_size;
313
314#endif
315 if (s->flags & SLAB_KASAN)
316 return s->object_size;
317
318 /*
319 * If we have the need to store the freelist pointer
320 * back there or track user information then we can
321 * only use the space before that information.
322 */
323 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
324 return s->inuse;
325 /*
326 * Else we can use all the padding etc for the allocation
327 */
328 return s->size;
329}
330
331static inline int order_objects(int order, unsigned long size, int reserved)
332{
333 return ((PAGE_SIZE << order) - reserved) / size;
334}
335
336static inline struct kmem_cache_order_objects oo_make(int order,
337 unsigned long size, int reserved)
338{
339 struct kmem_cache_order_objects x = {
340 (order << OO_SHIFT) + order_objects(order, size, reserved)
341 };
342
343 return x;
344}
345
346static inline int oo_order(struct kmem_cache_order_objects x)
347{
348 return x.x >> OO_SHIFT;
349}
350
351static inline int oo_objects(struct kmem_cache_order_objects x)
352{
353 return x.x & OO_MASK;
354}
355
356/*
357 * Per slab locking using the pagelock
358 */
359static __always_inline void slab_lock(struct page *page)
360{
361 bit_spin_lock(PG_locked, &page->flags);
362}
363
364static __always_inline void slab_unlock(struct page *page)
365{
366 __bit_spin_unlock(PG_locked, &page->flags);
367}
368
369static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
370{
371 struct page tmp;
372 tmp.counters = counters_new;
373 /*
374 * page->counters can cover frozen/inuse/objects as well
375 * as page->_count. If we assign to ->counters directly
376 * we run the risk of losing updates to page->_count, so
377 * be careful and only assign to the fields we need.
378 */
379 page->frozen = tmp.frozen;
380 page->inuse = tmp.inuse;
381 page->objects = tmp.objects;
382}
383
384/* Interrupts must be disabled (for the fallback code to work right) */
385static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
386 void *freelist_old, unsigned long counters_old,
387 void *freelist_new, unsigned long counters_new,
388 const char *n)
389{
390 VM_BUG_ON(!irqs_disabled());
391#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
392 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
393 if (s->flags & __CMPXCHG_DOUBLE) {
394 if (cmpxchg_double(&page->freelist, &page->counters,
395 freelist_old, counters_old,
396 freelist_new, counters_new))
397 return true;
398 } else
399#endif
400 {
401 slab_lock(page);
402 if (page->freelist == freelist_old &&
403 page->counters == counters_old) {
404 page->freelist = freelist_new;
405 set_page_slub_counters(page, counters_new);
406 slab_unlock(page);
407 return true;
408 }
409 slab_unlock(page);
410 }
411
412 cpu_relax();
413 stat(s, CMPXCHG_DOUBLE_FAIL);
414
415#ifdef SLUB_DEBUG_CMPXCHG
416 pr_info("%s %s: cmpxchg double redo ", n, s->name);
417#endif
418
419 return false;
420}
421
422static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
423 void *freelist_old, unsigned long counters_old,
424 void *freelist_new, unsigned long counters_new,
425 const char *n)
426{
427#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
428 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
429 if (s->flags & __CMPXCHG_DOUBLE) {
430 if (cmpxchg_double(&page->freelist, &page->counters,
431 freelist_old, counters_old,
432 freelist_new, counters_new))
433 return true;
434 } else
435#endif
436 {
437 unsigned long flags;
438
439 local_irq_save(flags);
440 slab_lock(page);
441 if (page->freelist == freelist_old &&
442 page->counters == counters_old) {
443 page->freelist = freelist_new;
444 set_page_slub_counters(page, counters_new);
445 slab_unlock(page);
446 local_irq_restore(flags);
447 return true;
448 }
449 slab_unlock(page);
450 local_irq_restore(flags);
451 }
452
453 cpu_relax();
454 stat(s, CMPXCHG_DOUBLE_FAIL);
455
456#ifdef SLUB_DEBUG_CMPXCHG
457 pr_info("%s %s: cmpxchg double redo ", n, s->name);
458#endif
459
460 return false;
461}
462
463#ifdef CONFIG_SLUB_DEBUG
464/*
465 * Determine a map of object in use on a page.
466 *
467 * Node listlock must be held to guarantee that the page does
468 * not vanish from under us.
469 */
470static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
471{
472 void *p;
473 void *addr = page_address(page);
474
475#ifdef CONFIG_RKP_KDP
476 check_cred_cache(s, );
477#endif /* CONFIG_RKP_KDP */
478 for (p = page->freelist; p; p = get_freepointer(s, p))
479 set_bit(slab_index(p, s, addr), map);
480}
481
482static inline int size_from_object(struct kmem_cache *s)
483{
484 if (s->flags & SLAB_RED_ZONE)
485 return s->size - s->red_left_pad;
486
487 return s->size;
488}
489
490static inline void *restore_red_left(struct kmem_cache *s, void *p)
491{
492 if (s->flags & SLAB_RED_ZONE)
493 p -= s->red_left_pad;
494
495 return p;
496}
497
498/*
499 * Debug settings:
500 */
501#if defined(CONFIG_SLUB_DEBUG_ON)
502static int slub_debug = DEBUG_DEFAULT_FLAGS;
503#else
504static int slub_debug;
505#endif
506
507static char *slub_debug_slabs;
508static int disable_higher_order_debug;
509
510/*
511 * slub is about to manipulate internal object metadata. This memory lies
512 * outside the range of the allocated object, so accessing it would normally
513 * be reported by kasan as a bounds error. metadata_access_enable() is used
514 * to tell kasan that these accesses are OK.
515 */
516static inline void metadata_access_enable(void)
517{
518 kasan_disable_current();
519}
520
521static inline void metadata_access_disable(void)
522{
523 kasan_enable_current();
524}
525
526/*
527 * Object debugging
528 */
529
530/* Verify that a pointer has an address that is valid within a slab page */
531static inline int check_valid_pointer(struct kmem_cache *s,
532 struct page *page, void *object)
533{
534 void *base;
535
536 if (!object)
537 return 1;
538
539 base = page_address(page);
540 object = restore_red_left(s, object);
541 if (object < base || object >= base + page->objects * s->size ||
542 (object - base) % s->size) {
543 return 0;
544 }
545
546 return 1;
547}
548
549static void print_section(char *text, u8 *addr, unsigned int length)
550{
551 metadata_access_enable();
552 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
553 length, 1);
554 metadata_access_disable();
555}
556
557static struct track *get_track(struct kmem_cache *s, void *object,
558 enum track_item alloc)
559{
560 struct track *p;
561
562 if (s->offset)
563 p = object + s->offset + sizeof(void *);
564 else
565 p = object + s->inuse;
566
567 return p + alloc;
568}
569
570static void set_track(struct kmem_cache *s, void *object,
571 enum track_item alloc, unsigned long addr)
572{
573 struct track *p = get_track(s, object, alloc);
574
575#ifdef CONFIG_RKP_KDP
576 check_cred_cache(s, );
577#endif /* CONFIG_RKP_KDP */
578 if (addr) {
579#ifdef CONFIG_STACKTRACE
580 struct stack_trace trace;
581 int i;
582
583 trace.nr_entries = 0;
584 trace.max_entries = TRACK_ADDRS_COUNT;
585 trace.entries = p->addrs;
586 trace.skip = 3;
587 metadata_access_enable();
588 save_stack_trace(&trace);
589 metadata_access_disable();
590
591 /* See rant in lockdep.c */
592 if (trace.nr_entries != 0 &&
593 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
594 trace.nr_entries--;
595
596 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
597 p->addrs[i] = 0;
598#endif
599 p->addr = addr;
600 p->cpu = smp_processor_id();
601 p->pid = current->pid;
602 p->when = jiffies;
603 } else
604 memset(p, 0, sizeof(struct track));
605}
606
607static void init_tracking(struct kmem_cache *s, void *object)
608{
609 if (!(s->flags & SLAB_STORE_USER))
610 return;
611
612 set_track(s, object, TRACK_FREE, 0UL);
613 set_track(s, object, TRACK_ALLOC, 0UL);
614}
615
616static void print_track(const char *s, struct track *t)
617{
618 if (!t->addr)
619 return;
620
621 pr_auto(ASL7, "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
622 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
623#ifdef CONFIG_STACKTRACE
624 {
625 int i;
626 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
627 if (t->addrs[i])
628 pr_err("\t%pS\n", (void *)t->addrs[i]);
629 else
630 break;
631 }
632#endif
633}
634
635static void print_tracking(struct kmem_cache *s, void *object)
636{
637 if (!(s->flags & SLAB_STORE_USER))
638 return;
639
640 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
641 print_track("Freed", get_track(s, object, TRACK_FREE));
642}
643
644static void print_page_info(struct page *page)
645{
646 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
647 page, page->objects, page->inuse, page->freelist, page->flags);
648
649}
650
651static void slab_bug(struct kmem_cache *s, char *fmt, ...)
652{
653 struct va_format vaf;
654 va_list args;
655
656 va_start(args, fmt);
657 vaf.fmt = fmt;
658 vaf.va = &args;
659 pr_auto(ASL7, "=============================================================================\n");
660 pr_auto(ASL7, "BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
661 pr_auto(ASL7, "-----------------------------------------------------------------------------\n\n");
662
663 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
664 va_end(args);
665}
666
667static void slab_fix(struct kmem_cache *s, char *fmt, ...)
668{
669 struct va_format vaf;
670 va_list args;
671
672 va_start(args, fmt);
673 vaf.fmt = fmt;
674 vaf.va = &args;
675 pr_err("FIX %s: %pV\n", s->name, &vaf);
676 va_end(args);
677}
678
679static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
680{
681 unsigned int off; /* Offset of last byte */
682 u8 *addr = page_address(page);
683
684 print_tracking(s, p);
685
686 print_page_info(page);
687
688 pr_auto(ASL7, "INFO: Object 0x%p @offset=%tu fp=0x%p\n",
689 p, p - addr, get_freepointer(s, p));
690
691 if (s->flags & SLAB_RED_ZONE)
692 print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
693 else if (p > addr + 16)
694 print_section("Bytes b4 ", p - 16, 16);
695
696 print_section("Object ", p, min_t(unsigned long, s->object_size,
697 PAGE_SIZE));
698 if (s->flags & SLAB_RED_ZONE)
699 print_section("Redzone ", p + s->object_size,
700 s->inuse - s->object_size);
701
702 if (s->offset)
703 off = s->offset + sizeof(void *);
704 else
705 off = s->inuse;
706
707 if (s->flags & SLAB_STORE_USER)
708 off += 2 * sizeof(struct track);
709
710 off += kasan_metadata_size(s);
711
712 if (off != size_from_object(s))
713 /* Beginning of the filler is the free pointer */
714 print_section("Padding ", p + off, size_from_object(s) - off);
715
716 dump_stack();
717}
718
719void object_err(struct kmem_cache *s, struct page *page,
720 u8 *object, char *reason)
721{
722 pr_auto_once(7);
723 slab_bug(s, "%s", reason);
724 print_trailer(s, page, object);
725 pr_auto_disable(7);
726}
727
728static void slab_err(struct kmem_cache *s, struct page *page,
729 const char *fmt, ...)
730{
731 va_list args;
732 char buf[100];
733
734 pr_auto_once(7);
735 va_start(args, fmt);
736 vsnprintf(buf, sizeof(buf), fmt, args);
737 va_end(args);
738 slab_bug(s, "%s", buf);
739 print_page_info(page);
740 dump_stack();
741 pr_auto_disable(7);
742}
743
744static void init_object(struct kmem_cache *s, void *object, u8 val)
745{
746 u8 *p = object;
747
748#ifdef CONFIG_RKP_KDP
749 check_cred_cache(s, );
750#endif /* CONFIG_RKP_KDP */
751
752 if (s->flags & SLAB_RED_ZONE)
753 memset(p - s->red_left_pad, val, s->red_left_pad);
754
755 if (s->flags & __OBJECT_POISON) {
756 memset(p, POISON_FREE, s->object_size - 1);
757 p[s->object_size - 1] = POISON_END;
758 }
759
760 if (s->flags & SLAB_RED_ZONE)
761 memset(p + s->object_size, val, s->inuse - s->object_size);
762}
763
764static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
765 void *from, void *to)
766{
767 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
768 memset(from, data, to - from);
769}
770
771static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
772 u8 *object, char *what,
773 u8 *start, unsigned int value, unsigned int bytes)
774{
775 u8 *fault;
776 u8 *end;
777
778 metadata_access_enable();
779#ifdef CONFIG_RKP_KDP
780 check_cred_cache(s,1);
781#endif /* CONFIG_RKP_KDP */
782
783 fault = memchr_inv(start, value, bytes);
784 metadata_access_disable();
785 if (!fault)
786 return 1;
787
788 end = start + bytes;
789 while (end > fault && end[-1] == value)
790 end--;
791
792 pr_auto_once(7);
793 slab_bug(s, "%s overwritten", what);
794 pr_auto(ASL7, "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
795 fault, end - 1, fault[0], value);
796 print_trailer(s, page, object);
797 BUG();
798 pr_auto_disable(7);
799
800 restore_bytes(s, what, value, fault, end);
801 return 0;
802}
803
804/*
805 * Object layout:
806 *
807 * object address
808 * Bytes of the object to be managed.
809 * If the freepointer may overlay the object then the free
810 * pointer is the first word of the object.
811 *
812 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
813 * 0xa5 (POISON_END)
814 *
815 * object + s->object_size
816 * Padding to reach word boundary. This is also used for Redzoning.
817 * Padding is extended by another word if Redzoning is enabled and
818 * object_size == inuse.
819 *
820 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
821 * 0xcc (RED_ACTIVE) for objects in use.
822 *
823 * object + s->inuse
824 * Meta data starts here.
825 *
826 * A. Free pointer (if we cannot overwrite object on free)
827 * B. Tracking data for SLAB_STORE_USER
828 * C. Padding to reach required alignment boundary or at mininum
829 * one word if debugging is on to be able to detect writes
830 * before the word boundary.
831 *
832 * Padding is done using 0x5a (POISON_INUSE)
833 *
834 * object + s->size
835 * Nothing is used beyond s->size.
836 *
837 * If slabcaches are merged then the object_size and inuse boundaries are mostly
838 * ignored. And therefore no slab options that rely on these boundaries
839 * may be used with merged slabcaches.
840 */
841
842static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
843{
844 unsigned long off = s->inuse; /* The end of info */
845
846 if (s->offset)
847 /* Freepointer is placed after the object. */
848 off += sizeof(void *);
849
850 if (s->flags & SLAB_STORE_USER)
851 /* We also have user information there */
852 off += 2 * sizeof(struct track);
853
854 off += kasan_metadata_size(s);
855
856 if (size_from_object(s) == off)
857 return 1;
858
859 return check_bytes_and_report(s, page, p, "Object padding",
860 p + off, POISON_INUSE, size_from_object(s) - off);
861}
862
863/* Check the pad bytes at the end of a slab page */
864static int slab_pad_check(struct kmem_cache *s, struct page *page)
865{
866 u8 *start;
867 u8 *fault;
868 u8 *end;
869 int length;
870 int remainder;
871
872 if (!(s->flags & SLAB_POISON))
873 return 1;
874
875#ifdef CONFIG_RKP_KDP
876 check_cred_cache(s,1);
877#endif /* CONFIG_RKP_KDP */
878 start = page_address(page);
879 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
880 end = start + length;
881 remainder = length % s->size;
882 if (!remainder)
883 return 1;
884
885 metadata_access_enable();
886 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
887 metadata_access_disable();
888 if (!fault)
889 return 1;
890 while (end > fault && end[-1] == POISON_INUSE)
891 end--;
892
893 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
894 print_section("Padding ", end - remainder, remainder);
895 BUG();
896
897 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
898 return 0;
899}
900
901static int check_object(struct kmem_cache *s, struct page *page,
902 void *object, u8 val)
903{
904 u8 *p = object;
905 u8 *endobject = object + s->object_size;
906
907 if (s->flags & SLAB_RED_ZONE) {
908 if (!check_bytes_and_report(s, page, object, "Redzone",
909 object - s->red_left_pad, val, s->red_left_pad))
910 return 0;
911
912 if (!check_bytes_and_report(s, page, object, "Redzone",
913 endobject, val, s->inuse - s->object_size))
914 return 0;
915 } else {
916 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
917 check_bytes_and_report(s, page, p, "Alignment padding",
918 endobject, POISON_INUSE,
919 s->inuse - s->object_size);
920 }
921 }
922
923 if (s->flags & SLAB_POISON) {
924 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
925 (!check_bytes_and_report(s, page, p, "Poison", p,
926 POISON_FREE, s->object_size - 1) ||
927 !check_bytes_and_report(s, page, p, "Poison",
928 p + s->object_size - 1, POISON_END, 1)))
929 return 0;
930 /*
931 * check_pad_bytes cleans up on its own.
932 */
933 check_pad_bytes(s, page, p);
934 }
935
936 if (!s->offset && val == SLUB_RED_ACTIVE)
937 /*
938 * Object and freepointer overlap. Cannot check
939 * freepointer while object is allocated.
940 */
941 return 1;
942
943 /* Check free pointer validity */
944 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
945 object_err(s, page, p, "Freepointer corrupt");
946 BUG();
947 /*
948 * No choice but to zap it and thus lose the remainder
949 * of the free objects in this slab. May cause
950 * another error because the object count is now wrong.
951 */
952 set_freepointer(s, p, NULL);
953 return 0;
954 }
955 return 1;
956}
957
958static int check_slab(struct kmem_cache *s, struct page *page)
959{
960 int maxobj;
961
962 VM_BUG_ON(!irqs_disabled());
963
964 if (!PageSlab(page)) {
965 slab_err(s, page, "Not a valid slab page");
966 return 0;
967 }
968#ifdef CONFIG_RKP_KDP
969 /*
970 * Skip this function for now
971 */
972 if (s->name && (!strcmp(s->name, CRED_JAR_RO) ||
973 !strcmp(s->name, TSEC_JAR) ||
974 !strcmp(s->name, VFSMNT_JAR)))
975 return 1;
976#endif /*CONFIG_RKP_KDP*/
977 maxobj = order_objects(compound_order(page), s->size, s->reserved);
978 if (page->objects > maxobj) {
979 slab_err(s, page, "objects %u > max %u",
980 page->objects, maxobj);
981 return 0;
982 }
983 if (page->inuse > page->objects) {
984 slab_err(s, page, "inuse %u > max %u",
985 page->inuse, page->objects);
986 return 0;
987 }
988 /* Slab_pad_check fixes things up after itself */
989 slab_pad_check(s, page);
990 return 1;
991}
992
993/*
994 * Determine if a certain object on a page is on the freelist. Must hold the
995 * slab lock to guarantee that the chains are in a consistent state.
996 */
997static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
998{
999 int nr = 0;
1000 void *fp;
1001 void *object = NULL;
1002 int max_objects;
1003
1004 fp = page->freelist;
1005#ifdef CONFIG_RKP_KDP
1006 check_cred_cache(s,0);
1007#endif /* CONFIG_RKP_KDP */
1008 while (fp && nr <= page->objects) {
1009 if (fp == search)
1010 return 1;
1011 if (!check_valid_pointer(s, page, fp)) {
1012 if (object) {
1013 object_err(s, page, object,
1014 "Freechain corrupt");
1015 BUG();
1016 set_freepointer(s, object, NULL);
1017 } else {
1018 slab_err(s, page, "Freepointer corrupt");
1019 BUG();
1020 page->freelist = NULL;
1021 page->inuse = page->objects;
1022 slab_fix(s, "Freelist cleared");
1023 return 0;
1024 }
1025 break;
1026 }
1027 object = fp;
1028 fp = get_freepointer(s, object);
1029 nr++;
1030 }
1031
1032 max_objects = order_objects(compound_order(page), s->size, s->reserved);
1033 if (max_objects > MAX_OBJS_PER_PAGE)
1034 max_objects = MAX_OBJS_PER_PAGE;
1035
1036 if (page->objects != max_objects) {
1037 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1038 page->objects, max_objects);
1039 BUG();
1040 page->objects = max_objects;
1041 slab_fix(s, "Number of objects adjusted.");
1042 }
1043 if (page->inuse != page->objects - nr) {
1044 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1045 page->inuse, page->objects - nr);
1046 BUG();
1047 page->inuse = page->objects - nr;
1048 slab_fix(s, "Object count adjusted.");
1049 }
1050 return search == NULL;
1051}
1052
1053static void trace(struct kmem_cache *s, struct page *page, void *object,
1054 int alloc)
1055{
1056 if (s->flags & SLAB_TRACE) {
1057 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1058 s->name,
1059 alloc ? "alloc" : "free",
1060 object, page->inuse,
1061 page->freelist);
1062
1063 if (!alloc)
1064 print_section("Object ", (void *)object,
1065 s->object_size);
1066
1067 dump_stack();
1068 }
1069}
1070
1071/*
1072 * Tracking of fully allocated slabs for debugging purposes.
1073 */
1074static void add_full(struct kmem_cache *s,
1075 struct kmem_cache_node *n, struct page *page)
1076{
1077#ifdef CONFIG_RKP_KDP
1078 check_cred_cache(s, );
1079#endif /* CONFIG_RKP_KDP */
1080 if (!(s->flags & SLAB_STORE_USER))
1081 return;
1082
1083 lockdep_assert_held(&n->list_lock);
1084 list_add(&page->lru, &n->full);
1085}
1086
1087static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1088{
1089#ifdef CONFIG_RKP_KDP
1090 check_cred_cache(s, );
1091#endif /* CONFIG_RKP_KDP */
1092 if (!(s->flags & SLAB_STORE_USER))
1093 return;
1094
1095 lockdep_assert_held(&n->list_lock);
1096 list_del(&page->lru);
1097}
1098
1099/* Tracking of the number of slabs for debugging purposes */
1100static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1101{
1102 struct kmem_cache_node *n = get_node(s, node);
1103
1104 return atomic_long_read(&n->nr_slabs);
1105}
1106
1107static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1108{
1109 return atomic_long_read(&n->nr_slabs);
1110}
1111
1112static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1113{
1114 struct kmem_cache_node *n = get_node(s, node);
1115
1116 /*
1117 * May be called early in order to allocate a slab for the
1118 * kmem_cache_node structure. Solve the chicken-egg
1119 * dilemma by deferring the increment of the count during
1120 * bootstrap (see early_kmem_cache_node_alloc).
1121 */
1122 if (likely(n)) {
1123 atomic_long_inc(&n->nr_slabs);
1124 atomic_long_add(objects, &n->total_objects);
1125 }
1126}
1127static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1128{
1129 struct kmem_cache_node *n = get_node(s, node);
1130
1131 atomic_long_dec(&n->nr_slabs);
1132 atomic_long_sub(objects, &n->total_objects);
1133}
1134
1135/* Object debug checks for alloc/free paths */
1136static void setup_object_debug(struct kmem_cache *s, struct page *page,
1137 void *object)
1138{
1139 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1140 return;
1141
1142 init_object(s, object, SLUB_RED_INACTIVE);
1143 init_tracking(s, object);
1144}
1145
1146static noinline int alloc_debug_processing(struct kmem_cache *s,
1147 struct page *page,
1148 void *object, unsigned long addr)
1149{
1150#ifdef CONFIG_RKP_KDP
1151 check_cred_cache(s,0);
1152#endif /* CONFIG_RKP_KDP */
1153 if (!check_slab(s, page))
1154 goto bad;
1155
1156 if (!check_valid_pointer(s, page, object)) {
1157 object_err(s, page, object, "Freelist Pointer check fails");
1158 goto bad;
1159 }
1160
1161 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1162 goto bad;
1163
1164 /* Success perform special debug activities for allocs */
1165 if (s->flags & SLAB_STORE_USER)
1166 set_track(s, object, TRACK_ALLOC, addr);
1167 trace(s, page, object, 1);
1168 init_object(s, object, SLUB_RED_ACTIVE);
1169 return 1;
1170
1171bad:
1172 BUG();
1173 if (PageSlab(page)) {
1174 /*
1175 * If this is a slab page then lets do the best we can
1176 * to avoid issues in the future. Marking all objects
1177 * as used avoids touching the remaining objects.
1178 */
1179 slab_fix(s, "Marking all objects used");
1180 page->inuse = page->objects;
1181 page->freelist = NULL;
1182 }
1183 return 0;
1184}
1185
1186/* Supports checking bulk free of a constructed freelist */
1187static noinline struct kmem_cache_node *free_debug_processing(
1188 struct kmem_cache *s, struct page *page,
1189 void *head, void *tail, int bulk_cnt,
1190 unsigned long addr, unsigned long *flags)
1191{
1192 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1193 void *object = head;
1194 int cnt = 0;
1195
1196#ifdef CONFIG_RKP_KDP
1197 check_cred_cache(s,NULL);
1198#endif /* CONFIG_RKP_KDP */
1199 spin_lock_irqsave(&n->list_lock, *flags);
1200 slab_lock(page);
1201
1202 if (!check_slab(s, page))
1203 goto fail;
1204
1205next_object:
1206 cnt++;
1207
1208 if (!check_valid_pointer(s, page, object)) {
1209 slab_err(s, page, "Invalid object pointer 0x%p", object);
1210 goto fail;
1211 }
1212
1213 if (on_freelist(s, page, object)) {
1214 object_err(s, page, object, "Object already free");
1215 goto fail;
1216 }
1217
1218 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1219 goto out;
1220
1221 if (unlikely(s != page->slab_cache)) {
1222 if (!PageSlab(page)) {
1223 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1224 object);
1225 } else if (!page->slab_cache) {
1226 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1227 object);
1228 dump_stack();
1229 } else
1230 object_err(s, page, object,
1231 "page slab pointer corrupt.");
1232 goto fail;
1233 }
1234
1235 if (s->flags & SLAB_STORE_USER)
1236 set_track(s, object, TRACK_FREE, addr);
1237 trace(s, page, object, 0);
1238 /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
1239 init_object(s, object, SLUB_RED_INACTIVE);
1240
1241 /* Reached end of constructed freelist yet? */
1242 if (object != tail) {
1243 object = get_freepointer(s, object);
1244 goto next_object;
1245 }
1246out:
1247 if (cnt != bulk_cnt) {
1248 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1249 bulk_cnt, cnt);
1250 BUG();
1251 }
1252
1253 slab_unlock(page);
1254 /*
1255 * Keep node_lock to preserve integrity
1256 * until the object is actually freed
1257 */
1258 return n;
1259
1260fail:
1261 BUG();
1262 slab_unlock(page);
1263 spin_unlock_irqrestore(&n->list_lock, *flags);
1264 slab_fix(s, "Object at 0x%p not freed", object);
1265 return NULL;
1266}
1267
1268static int __init setup_slub_debug(char *str)
1269{
1270 slub_debug = DEBUG_DEFAULT_FLAGS;
1271 if (*str++ != '=' || !*str)
1272 /*
1273 * No options specified. Switch on full debugging.
1274 */
1275 goto out;
1276
1277 if (*str == ',')
1278 /*
1279 * No options but restriction on slabs. This means full
1280 * debugging for slabs matching a pattern.
1281 */
1282 goto check_slabs;
1283
1284 slub_debug = 0;
1285 if (*str == '-')
1286 /*
1287 * Switch off all debugging measures.
1288 */
1289 goto out;
1290
1291 /*
1292 * Determine which debug features should be switched on
1293 */
1294 for (; *str && *str != ','; str++) {
1295 switch (tolower(*str)) {
1296 case 'f':
1297 slub_debug |= SLAB_DEBUG_FREE;
1298 break;
1299 case 'z':
1300 slub_debug |= SLAB_RED_ZONE;
1301 break;
1302 case 'p':
1303 slub_debug |= SLAB_POISON;
1304 break;
1305 case 'u':
1306 slub_debug |= SLAB_STORE_USER;
1307 break;
1308 case 't':
1309 slub_debug |= SLAB_TRACE;
1310 break;
1311 case 'a':
1312 slub_debug |= SLAB_FAILSLAB;
1313 break;
1314 case 'o':
1315 /*
1316 * Avoid enabling debugging on caches if its minimum
1317 * order would increase as a result.
1318 */
1319 disable_higher_order_debug = 1;
1320 break;
1321 default:
1322 pr_err("slub_debug option '%c' unknown. skipped\n",
1323 *str);
1324 }
1325 }
1326
1327check_slabs:
1328 if (*str == ',')
1329 slub_debug_slabs = str + 1;
1330out:
1331 return 1;
1332}
1333
1334__setup("slub_debug", setup_slub_debug);
1335
1336unsigned long kmem_cache_flags(unsigned long object_size,
1337 unsigned long flags, const char *name,
1338 void (*ctor)(void *))
1339{
1340#ifdef CONFIG_RKP_KDP
1341 if (name && (!strcmp(name, CRED_JAR_RO) || !strcmp(name, TSEC_JAR) || !strcmp(name, VFSMNT_JAR)))
1342 return flags;
1343#endif
1344
1345 /*
1346 * Enable debugging if selected on the kernel commandline.
1347 */
1348 if (slub_debug && (!slub_debug_slabs || (name &&
1349 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) {
1350 flags |= slub_debug;
1351
1352 if (name &&
1353 (!strncmp(name, "zspage", strlen("zspage")) ||
1354 !strncmp(name, "zs_handle", strlen("zs_handle")) ||
1355 !strncmp(name, "zswap_entry", strlen("zswap_entry")) ||
1356 !strncmp(name, "avtab_node", strlen("avtab_node"))))
1357 flags &= ~SLAB_STORE_USER;
1358 }
1359
1360 return flags;
1361}
1362#else /* !CONFIG_SLUB_DEBUG */
1363static inline void setup_object_debug(struct kmem_cache *s,
1364 struct page *page, void *object) {}
1365
1366static inline int alloc_debug_processing(struct kmem_cache *s,
1367 struct page *page, void *object, unsigned long addr) { return 0; }
1368
1369static inline struct kmem_cache_node *free_debug_processing(
1370 struct kmem_cache *s, struct page *page,
1371 void *head, void *tail, int bulk_cnt,
1372 unsigned long addr, unsigned long *flags) { return NULL; }
1373
1374static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1375 { return 1; }
1376static inline int check_object(struct kmem_cache *s, struct page *page,
1377 void *object, u8 val) { return 1; }
1378static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1379 struct page *page) {}
1380static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1381 struct page *page) {}
1382unsigned long kmem_cache_flags(unsigned long object_size,
1383 unsigned long flags, const char *name,
1384 void (*ctor)(void *))
1385{
1386 return flags;
1387}
1388#define slub_debug 0
1389
1390#define disable_higher_order_debug 0
1391
1392static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1393 { return 0; }
1394static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1395 { return 0; }
1396static inline void inc_slabs_node(struct kmem_cache *s, int node,
1397 int objects) {}
1398static inline void dec_slabs_node(struct kmem_cache *s, int node,
1399 int objects) {}
1400
1401#endif /* CONFIG_SLUB_DEBUG */
1402
1403/*
1404 * Hooks for other subsystems that check memory allocations. In a typical
1405 * production configuration these hooks all should produce no code at all.
1406 */
1407static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1408{
1409 kmemleak_alloc(ptr, size, 1, flags);
1410 kasan_kmalloc_large(ptr, size, flags);
1411}
1412
1413static inline void kfree_hook(const void *x)
1414{
1415 kmemleak_free(x);
1416 kasan_kfree_large(x);
1417}
1418
1419static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
1420 gfp_t flags)
1421{
1422 flags &= gfp_allowed_mask;
1423 lockdep_trace_alloc(flags);
1424 might_sleep_if(gfpflags_allow_blocking(flags));
1425
1426 if (should_failslab(s->object_size, flags, s->flags))
1427 return NULL;
1428
1429 return memcg_kmem_get_cache(s, flags);
1430}
1431
1432static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1433 size_t size, void **p)
1434{
1435 size_t i;
1436
1437 flags &= gfp_allowed_mask;
1438 for (i = 0; i < size; i++) {
1439 void *object = p[i];
1440
1441 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
1442 kmemleak_alloc_recursive(object, s->object_size, 1,
1443 s->flags, flags);
1444 kasan_slab_alloc(s, object, flags);
1445 }
1446 memcg_kmem_put_cache(s);
1447}
1448
1449static inline void *slab_free_hook(struct kmem_cache *s, void *x)
1450{
1451 void *freeptr;
1452
1453 kmemleak_free_recursive(x, s->flags);
1454
1455 /*
1456 * Trouble is that we may no longer disable interrupts in the fast path
1457 * So in order to make the debug calls that expect irqs to be
1458 * disabled we need to disable interrupts temporarily.
1459 */
1460#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
1461 {
1462 unsigned long flags;
1463
1464 local_irq_save(flags);
1465 kmemcheck_slab_free(s, x, s->object_size);
1466 debug_check_no_locks_freed(x, s->object_size);
1467 local_irq_restore(flags);
1468 }
1469#endif
1470 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1471 debug_check_no_obj_freed(x, s->object_size);
1472
1473 freeptr = get_freepointer(s, x);
1474 /*
1475 * kasan_slab_free() may put x into memory quarantine, delaying its
1476 * reuse. In this case the object's freelist pointer is changed.
1477 */
1478 kasan_slab_free(s, x);
1479 return freeptr;
1480}
1481
1482static inline void slab_free_freelist_hook(struct kmem_cache *s,
1483 void *head, void *tail)
1484{
1485/*
1486 * Compiler cannot detect this function can be removed if slab_free_hook()
1487 * evaluates to nothing. Thus, catch all relevant config debug options here.
1488 */
1489#if defined(CONFIG_KMEMCHECK) || \
1490 defined(CONFIG_LOCKDEP) || \
1491 defined(CONFIG_DEBUG_KMEMLEAK) || \
1492 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1493 defined(CONFIG_KASAN)
1494
1495 void *object = head;
1496 void *tail_obj = tail ? : head;
1497 void *freeptr;
1498
1499 do {
1500 freeptr = slab_free_hook(s, object);
1501 } while ((object != tail_obj) && (object = freeptr));
1502#endif
1503}
1504
1505static void setup_object(struct kmem_cache *s, struct page *page,
1506 void *object)
1507{
1508 setup_object_debug(s, page, object);
1509 kasan_init_slab_obj(s, object);
1510 if (unlikely(s->ctor)) {
1511 kasan_unpoison_object_data(s, object);
1512 s->ctor(object);
1513 kasan_poison_object_data(s, object);
1514 }
1515}
1516
1517/*
1518 * Slab allocation and freeing
1519 */
1520static inline struct page *alloc_slab_page(struct kmem_cache *s,
1521 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1522{
1523 struct page *page;
1524 int order = oo_order(oo);
1525
1526 flags |= __GFP_NOTRACK;
1527
1528 if (node == NUMA_NO_NODE)
1529 page = alloc_pages(flags, order);
1530 else
1531 page = __alloc_pages_node(node, flags, order);
1532
1533 if (page && memcg_charge_slab(page, flags, order, s)) {
1534 __free_pages(page, order);
1535 page = NULL;
1536 }
1537
1538 return page;
1539}
1540#ifdef CONFIG_RKP_DMAP_PROT
1541void dmap_prot(u64 addr,u64 order,u64 val)
1542{
1543 if(rkp_cred_enable)
1544 rkp_call(RKP_CMDID(0x4a),addr, order, val, 0, 0);
1545}
1546#endif
1547static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1548{
1549 struct page *page;
1550 struct kmem_cache_order_objects oo = s->oo;
1551#ifdef CONFIG_RKP_KDP
1552 void *virt_page = NULL;
1553#endif /*CONFIG_RKP_KDP*/
1554 gfp_t alloc_gfp;
1555 void *start, *p;
1556 int idx, order;
1557
1558 flags &= gfp_allowed_mask;
1559
1560 if (gfpflags_allow_blocking(flags))
1561 local_irq_enable();
1562
1563 flags |= s->allocflags;
1564
1565 /*
1566 * Let the initial higher-order allocation fail under memory pressure
1567 * so we fall-back to the minimum order allocation.
1568 */
1569 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1570 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1571 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_DIRECT_RECLAIM;
1572
1573#ifdef CONFIG_RKP_KDP
1574 if (s->name &&
1575 (!strcmp(s->name, CRED_JAR_RO) ||
1576 !strcmp(s->name, TSEC_JAR)||
1577 !strcmp(s->name, VFSMNT_JAR))) {
1578
1579 virt_page = rkp_ro_alloc();
1580 if(!virt_page)
1581 goto def_alloc;
1582
1583 page = virt_to_page(virt_page);
1584 oo = s->min;
1585 } else {
1586def_alloc:
1587#endif /*CONFIG_RKP_KDP*/
1588 page = alloc_slab_page(s, alloc_gfp, node, oo);
1589 if (unlikely(!page)) {
1590 oo = s->min;
1591 alloc_gfp = flags;
1592 /*
1593 * Allocation may have failed due to fragmentation.
1594 * Try a lower order alloc if possible
1595 */
1596 page = alloc_slab_page(s, alloc_gfp, node, oo);
1597 if (unlikely(!page))
1598 goto out;
1599 stat(s, ORDER_FALLBACK);
1600 }
1601
1602#ifdef CONFIG_RKP_KDP
1603 }
1604#endif /*CONFIG_RKP_KDP*/
1605
1606 if (kmemcheck_enabled &&
1607 !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1608 int pages = 1 << oo_order(oo);
1609
1610 kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node);
1611
1612 /*
1613 * Objects from caches that have a constructor don't get
1614 * cleared when they're allocated, so we need to do it here.
1615 */
1616 if (s->ctor)
1617 kmemcheck_mark_uninitialized_pages(page, pages);
1618 else
1619 kmemcheck_mark_unallocated_pages(page, pages);
1620 }
1621
1622 page->objects = oo_objects(oo);
1623
1624 order = compound_order(page);
1625 page->slab_cache = s;
1626 __SetPageSlab(page);
1627 if (page_is_pfmemalloc(page))
1628 SetPageSlabPfmemalloc(page);
1629
1630 start = page_address(page);
1631
1632 if (unlikely(s->flags & SLAB_POISON))
1633 memset(start, POISON_INUSE, PAGE_SIZE << order);
1634
1635 kasan_poison_slab(page);
1636#ifdef CONFIG_RKP_KDP
1637 /*
1638 * We modify the following so that slab alloc for protected data
1639 * types are allocated from our own pool.
1640 */
1641 if (s->name) {
1642 u64 sc,va_page;
1643 va_page = (u64)__va(page_to_phys(page));
1644
1645 if(!strcmp(s->name, CRED_JAR_RO)){
1646 for(sc = 0; sc < (1 << oo_order(oo)) ; sc++) {
1647 rkp_call(RKP_CMDID(0x50),va_page,0,0,0,0);
1648 va_page += PAGE_SIZE;
1649 }
1650 }
1651 if(!strcmp(s->name, TSEC_JAR)){
1652 for(sc = 0; sc < (1 << oo_order(oo)) ; sc++) {
1653 rkp_call(RKP_CMDID(0x4e),va_page,0,0,0,0);
1654 va_page += PAGE_SIZE;
1655 }
1656 }
1657 if(!strcmp(s->name, VFSMNT_JAR)){
1658 for(sc = 0; sc < (1 << oo_order(oo)) ; sc++) {
1659 rkp_call(RKP_CMDID(0x4f),va_page,0,0,0,0);
1660 va_page += PAGE_SIZE;
1661 }
1662 }
1663 }
1664#endif
1665
1666 for_each_object_idx(p, idx, s, start, page->objects) {
1667 setup_object(s, page, p);
1668 if (likely(idx < page->objects))
1669 set_freepointer(s, p, p + s->size);
1670 else
1671 set_freepointer(s, p, NULL);
1672 }
1673#ifdef CONFIG_RKP_DMAP_PROT
1674 dmap_prot((u64)page_to_phys(page),(u64)compound_order(page),1);
1675#endif
1676
1677 page->freelist = fixup_red_left(s, start);
1678 page->inuse = page->objects;
1679 page->frozen = 1;
1680
1681out:
1682 if (gfpflags_allow_blocking(flags))
1683 local_irq_disable();
1684 if (!page)
1685 return NULL;
1686
1687 mod_zone_page_state(page_zone(page),
1688 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1689 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1690 1 << oo_order(oo));
1691
1692 inc_slabs_node(s, page_to_nid(page), page->objects);
1693 return page;
1694}
1695
1696static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1697{
1698 if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1699 pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
1700 BUG();
1701 }
1702
1703 return allocate_slab(s,
1704 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1705}
1706#ifdef CONFIG_RKP_KDP
1707int rkp_from_vfsmnt_cache(unsigned long addr)
1708{
1709 static void *objp;
1710 static struct kmem_cache *s;
1711 static struct page *page;
1712
1713 objp = (void *)addr;
1714
1715 if(!objp)
1716 return 0;
1717
1718 page = virt_to_head_page(objp);
1719 s = page->slab_cache;
1720 if(s && s->name) {
1721 if(!strcmp(s->name,"vfsmnt_cache")) {
1722 return 1;
1723 }
1724 }
1725 return 0;
1726}
1727
1728extern unsigned int is_rkp_ro_page(u64 addr);
1729void free_ro_pages(struct kmem_cache *s,struct page *page, int order)
1730{
1731 unsigned long flags;
1732 unsigned long long sc,va_page;
1733
1734 sc = 0;
1735 va_page = (unsigned long long)__va(page_to_phys(page));
1736 if(is_rkp_ro_page(va_page)){
1737 for(sc = 0; sc < (1 << order); sc++) {
1738 rkp_call(RKP_CMDID(0x48),va_page,0,0,0,0);
1739 rkp_ro_free((void *)va_page);
1740 va_page += PAGE_SIZE;
1741 }
1742 return;
1743 }
1744
1745 spin_lock_irqsave(&ro_pages_lock,flags);
1746 for(sc = 0; sc < (1 << order); sc++) {
1747 rkp_call(RKP_CMDID(0x48),va_page,0,0,0,0);
1748 va_page += PAGE_SIZE;
1749 }
1750 __free_kmem_pages(page, order);
1751 spin_unlock_irqrestore(&ro_pages_lock,flags);
1752}
1753#endif /*CONFIG_RKP_KDP*/
1754
1755static void __free_slab(struct kmem_cache *s, struct page *page)
1756{
1757 int order = compound_order(page);
1758 int pages = 1 << order;
1759
1760#ifdef CONFIG_RKP_DMAP_PROT
1761 dmap_prot((u64)page_to_phys(page),(u64)compound_order(page),0);
1762#endif
1763
1764 if (kmem_cache_debug(s)) {
1765 void *p;
1766
1767 slab_pad_check(s, page);
1768 for_each_object(p, s, page_address(page),
1769 page->objects)
1770 check_object(s, page, p, SLUB_RED_INACTIVE);
1771 }
1772
1773 kmemcheck_free_shadow(page, compound_order(page));
1774
1775 mod_zone_page_state(page_zone(page),
1776 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1777 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1778 -pages);
1779
1780 __ClearPageSlabPfmemalloc(page);
1781 __ClearPageSlab(page);
1782
1783 page_mapcount_reset(page);
1784 if (current->reclaim_state)
1785 current->reclaim_state->reclaimed_slab += pages;
1786
1787#ifdef CONFIG_RKP_KDP
1788 /* We free the protected pages here. */
1789 if (s->name && (!strcmp(s->name, CRED_JAR_RO) ||
1790 !strcmp(s->name, TSEC_JAR) ||
1791 !strcmp(s->name, VFSMNT_JAR)))
1792 free_ro_pages(s,page, order);
1793 else
1794#endif
1795 __free_kmem_pages(page, order);
1796}
1797
1798#define need_reserve_slab_rcu \
1799 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1800
1801static void rcu_free_slab(struct rcu_head *h)
1802{
1803 struct page *page;
1804
1805 if (need_reserve_slab_rcu)
1806 page = virt_to_head_page(h);
1807 else
1808 page = container_of((struct list_head *)h, struct page, lru);
1809
1810 __free_slab(page->slab_cache, page);
1811}
1812
1813static void free_slab(struct kmem_cache *s, struct page *page)
1814{
1815 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1816 struct rcu_head *head;
1817
1818 if (need_reserve_slab_rcu) {
1819 int order = compound_order(page);
1820 int offset = (PAGE_SIZE << order) - s->reserved;
1821
1822 VM_BUG_ON(s->reserved != sizeof(*head));
1823 head = page_address(page) + offset;
1824 } else {
1825 head = &page->rcu_head;
1826 }
1827
1828 call_rcu(head, rcu_free_slab);
1829 } else
1830 __free_slab(s, page);
1831}
1832
1833static void discard_slab(struct kmem_cache *s, struct page *page)
1834{
1835 dec_slabs_node(s, page_to_nid(page), page->objects);
1836 free_slab(s, page);
1837}
1838
1839/*
1840 * Management of partially allocated slabs.
1841 */
1842static inline void
1843__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1844{
1845 n->nr_partial++;
1846 if (tail == DEACTIVATE_TO_TAIL)
1847 list_add_tail(&page->lru, &n->partial);
1848 else
1849 list_add(&page->lru, &n->partial);
1850}
1851
1852static inline void add_partial(struct kmem_cache_node *n,
1853 struct page *page, int tail)
1854{
1855 lockdep_assert_held(&n->list_lock);
1856 __add_partial(n, page, tail);
1857}
1858
1859static inline void
1860__remove_partial(struct kmem_cache_node *n, struct page *page)
1861{
1862 list_del(&page->lru);
1863 n->nr_partial--;
1864}
1865
1866static inline void remove_partial(struct kmem_cache_node *n,
1867 struct page *page)
1868{
1869 lockdep_assert_held(&n->list_lock);
1870 __remove_partial(n, page);
1871}
1872
1873/*
1874 * Remove slab from the partial list, freeze it and
1875 * return the pointer to the freelist.
1876 *
1877 * Returns a list of objects or NULL if it fails.
1878 */
1879static inline void *acquire_slab(struct kmem_cache *s,
1880 struct kmem_cache_node *n, struct page *page,
1881 int mode, int *objects)
1882{
1883 void *freelist;
1884 unsigned long counters;
1885 struct page new;
1886
1887 lockdep_assert_held(&n->list_lock);
1888
1889 /*
1890 * Zap the freelist and set the frozen bit.
1891 * The old freelist is the list of objects for the
1892 * per cpu allocation list.
1893 */
1894 freelist = page->freelist;
1895 counters = page->counters;
1896 new.counters = counters;
1897 *objects = new.objects - new.inuse;
1898 if (mode) {
1899 new.inuse = page->objects;
1900 new.freelist = NULL;
1901 } else {
1902 new.freelist = freelist;
1903 }
1904
1905 VM_BUG_ON(new.frozen);
1906 new.frozen = 1;
1907
1908 if (!__cmpxchg_double_slab(s, page,
1909 freelist, counters,
1910 new.freelist, new.counters,
1911 "acquire_slab"))
1912 return NULL;
1913
1914 remove_partial(n, page);
1915 WARN_ON(!freelist);
1916 return freelist;
1917}
1918
1919static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1920static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1921
1922/*
1923 * Try to allocate a partial slab from a specific node.
1924 */
1925static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1926 struct kmem_cache_cpu *c, gfp_t flags)
1927{
1928 struct page *page, *page2;
1929 void *object = NULL;
1930 int available = 0;
1931 int objects;
1932
1933 /*
1934 * Racy check. If we mistakenly see no partial slabs then we
1935 * just allocate an empty slab. If we mistakenly try to get a
1936 * partial slab and there is none available then get_partials()
1937 * will return NULL.
1938 */
1939 if (!n || !n->nr_partial)
1940 return NULL;
1941
1942 spin_lock(&n->list_lock);
1943 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1944 void *t;
1945
1946 if (!pfmemalloc_match(page, flags))
1947 continue;
1948
1949 t = acquire_slab(s, n, page, object == NULL, &objects);
1950 if (!t)
1951 break;
1952
1953 available += objects;
1954 if (!object) {
1955 c->page = page;
1956 stat(s, ALLOC_FROM_PARTIAL);
1957 object = t;
1958 } else {
1959 put_cpu_partial(s, page, 0);
1960 stat(s, CPU_PARTIAL_NODE);
1961 }
1962 if (!kmem_cache_has_cpu_partial(s)
1963 || available > s->cpu_partial / 2)
1964 break;
1965
1966 }
1967 spin_unlock(&n->list_lock);
1968 return object;
1969}
1970
1971/*
1972 * Get a page from somewhere. Search in increasing NUMA distances.
1973 */
1974static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1975 struct kmem_cache_cpu *c)
1976{
1977#ifdef CONFIG_NUMA
1978 struct zonelist *zonelist;
1979 struct zoneref *z;
1980 struct zone *zone;
1981 enum zone_type high_zoneidx = gfp_zone(flags);
1982 void *object;
1983 unsigned int cpuset_mems_cookie;
1984
1985 /*
1986 * The defrag ratio allows a configuration of the tradeoffs between
1987 * inter node defragmentation and node local allocations. A lower
1988 * defrag_ratio increases the tendency to do local allocations
1989 * instead of attempting to obtain partial slabs from other nodes.
1990 *
1991 * If the defrag_ratio is set to 0 then kmalloc() always
1992 * returns node local objects. If the ratio is higher then kmalloc()
1993 * may return off node objects because partial slabs are obtained
1994 * from other nodes and filled up.
1995 *
1996 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
1997 * defrag_ratio = 1000) then every (well almost) allocation will
1998 * first attempt to defrag slab caches on other nodes. This means
1999 * scanning over all nodes to look for partial slabs which may be
2000 * expensive if we do it every time we are trying to find a slab
2001 * with available objects.
2002 */
2003 if (!s->remote_node_defrag_ratio ||
2004 get_cycles() % 1024 > s->remote_node_defrag_ratio)
2005 return NULL;
2006
2007 do {
2008 cpuset_mems_cookie = read_mems_allowed_begin();
2009 zonelist = node_zonelist(mempolicy_slab_node(), flags);
2010 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2011 struct kmem_cache_node *n;
2012
2013 n = get_node(s, zone_to_nid(zone));
2014
2015 if (n && cpuset_zone_allowed(zone, flags) &&
2016 n->nr_partial > s->min_partial) {
2017 object = get_partial_node(s, n, c, flags);
2018 if (object) {
2019 /*
2020 * Don't check read_mems_allowed_retry()
2021 * here - if mems_allowed was updated in
2022 * parallel, that was a harmless race
2023 * between allocation and the cpuset
2024 * update
2025 */
2026 return object;
2027 }
2028 }
2029 }
2030 } while (read_mems_allowed_retry(cpuset_mems_cookie));
2031#endif
2032 return NULL;
2033}
2034
2035/*
2036 * Get a partial page, lock it and return it.
2037 */
2038static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2039 struct kmem_cache_cpu *c)
2040{
2041 void *object;
2042 int searchnode = node;
2043
2044 if (node == NUMA_NO_NODE)
2045 searchnode = numa_mem_id();
2046 else if (!node_present_pages(node))
2047 searchnode = node_to_mem_node(node);
2048
2049 object = get_partial_node(s, get_node(s, searchnode), c, flags);
2050 if (object || node != NUMA_NO_NODE)
2051 return object;
2052
2053 return get_any_partial(s, flags, c);
2054}
2055
2056#ifdef CONFIG_PREEMPT
2057/*
2058 * Calculate the next globally unique transaction for disambiguiation
2059 * during cmpxchg. The transactions start with the cpu number and are then
2060 * incremented by CONFIG_NR_CPUS.
2061 */
2062#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2063#else
2064/*
2065 * No preemption supported therefore also no need to check for
2066 * different cpus.
2067 */
2068#define TID_STEP 1
2069#endif
2070
2071static inline unsigned long next_tid(unsigned long tid)
2072{
2073 return tid + TID_STEP;
2074}
2075
2076static inline unsigned int tid_to_cpu(unsigned long tid)
2077{
2078 return tid % TID_STEP;
2079}
2080
2081static inline unsigned long tid_to_event(unsigned long tid)
2082{
2083 return tid / TID_STEP;
2084}
2085
2086static inline unsigned int init_tid(int cpu)
2087{
2088 return cpu;
2089}
2090
2091static inline void note_cmpxchg_failure(const char *n,
2092 const struct kmem_cache *s, unsigned long tid)
2093{
2094#ifdef SLUB_DEBUG_CMPXCHG
2095 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2096
2097 pr_info("%s %s: cmpxchg redo ", n, s->name);
2098
2099#ifdef CONFIG_PREEMPT
2100 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2101 pr_warn("due to cpu change %d -> %d\n",
2102 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2103 else
2104#endif
2105 if (tid_to_event(tid) != tid_to_event(actual_tid))
2106 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2107 tid_to_event(tid), tid_to_event(actual_tid));
2108 else
2109 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2110 actual_tid, tid, next_tid(tid));
2111#endif
2112 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2113}
2114
2115static void init_kmem_cache_cpus(struct kmem_cache *s)
2116{
2117 int cpu;
2118
2119 for_each_possible_cpu(cpu)
2120 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2121}
2122
2123/*
2124 * Remove the cpu slab
2125 */
2126static void deactivate_slab(struct kmem_cache *s, struct page *page,
2127 void *freelist)
2128{
2129 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2130 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2131 int lock = 0;
2132 enum slab_modes l = M_NONE, m = M_NONE;
2133 void *nextfree;
2134 int tail = DEACTIVATE_TO_HEAD;
2135 struct page new;
2136 struct page old;
2137
2138 if (page->freelist) {
2139 stat(s, DEACTIVATE_REMOTE_FREES);
2140 tail = DEACTIVATE_TO_TAIL;
2141 }
2142
2143 /*
2144 * Stage one: Free all available per cpu objects back
2145 * to the page freelist while it is still frozen. Leave the
2146 * last one.
2147 *
2148 * There is no need to take the list->lock because the page
2149 * is still frozen.
2150 */
2151 while (freelist && (nextfree = get_freepointer(s, freelist))) {
2152 void *prior;
2153 unsigned long counters;
2154
2155 do {
2156 prior = page->freelist;
2157 counters = page->counters;
2158 set_freepointer(s, freelist, prior);
2159 new.counters = counters;
2160 new.inuse--;
2161 VM_BUG_ON(!new.frozen);
2162
2163 } while (!__cmpxchg_double_slab(s, page,
2164 prior, counters,
2165 freelist, new.counters,
2166 "drain percpu freelist"));
2167
2168 freelist = nextfree;
2169 }
2170
2171 /*
2172 * Stage two: Ensure that the page is unfrozen while the
2173 * list presence reflects the actual number of objects
2174 * during unfreeze.
2175 *
2176 * We setup the list membership and then perform a cmpxchg
2177 * with the count. If there is a mismatch then the page
2178 * is not unfrozen but the page is on the wrong list.
2179 *
2180 * Then we restart the process which may have to remove
2181 * the page from the list that we just put it on again
2182 * because the number of objects in the slab may have
2183 * changed.
2184 */
2185redo:
2186
2187 old.freelist = page->freelist;
2188 old.counters = page->counters;
2189 VM_BUG_ON(!old.frozen);
2190
2191 /* Determine target state of the slab */
2192 new.counters = old.counters;
2193 if (freelist) {
2194 new.inuse--;
2195 set_freepointer(s, freelist, old.freelist);
2196 new.freelist = freelist;
2197 } else
2198 new.freelist = old.freelist;
2199
2200 new.frozen = 0;
2201
2202 if (!new.inuse && n->nr_partial >= s->min_partial)
2203 m = M_FREE;
2204 else if (new.freelist) {
2205 m = M_PARTIAL;
2206 if (!lock) {
2207 lock = 1;
2208 /*
2209 * Taking the spinlock removes the possiblity
2210 * that acquire_slab() will see a slab page that
2211 * is frozen
2212 */
2213 spin_lock(&n->list_lock);
2214 }
2215 } else {
2216 m = M_FULL;
2217 if (kmem_cache_debug(s) && !lock) {
2218 lock = 1;
2219 /*
2220 * This also ensures that the scanning of full
2221 * slabs from diagnostic functions will not see
2222 * any frozen slabs.
2223 */
2224 spin_lock(&n->list_lock);
2225 }
2226 }
2227
2228 if (l != m) {
2229
2230 if (l == M_PARTIAL)
2231
2232 remove_partial(n, page);
2233
2234 else if (l == M_FULL)
2235
2236 remove_full(s, n, page);
2237
2238 if (m == M_PARTIAL) {
2239
2240 add_partial(n, page, tail);
2241 stat(s, tail);
2242
2243 } else if (m == M_FULL) {
2244
2245 stat(s, DEACTIVATE_FULL);
2246 add_full(s, n, page);
2247
2248 }
2249 }
2250
2251 l = m;
2252 if (!__cmpxchg_double_slab(s, page,
2253 old.freelist, old.counters,
2254 new.freelist, new.counters,
2255 "unfreezing slab"))
2256 goto redo;
2257
2258 if (lock)
2259 spin_unlock(&n->list_lock);
2260
2261 if (m == M_FREE) {
2262 stat(s, DEACTIVATE_EMPTY);
2263 discard_slab(s, page);
2264 stat(s, FREE_SLAB);
2265 }
2266}
2267
2268/*
2269 * Unfreeze all the cpu partial slabs.
2270 *
2271 * This function must be called with interrupts disabled
2272 * for the cpu using c (or some other guarantee must be there
2273 * to guarantee no concurrent accesses).
2274 */
2275static void unfreeze_partials(struct kmem_cache *s,
2276 struct kmem_cache_cpu *c)
2277{
2278#ifdef CONFIG_SLUB_CPU_PARTIAL
2279 struct kmem_cache_node *n = NULL, *n2 = NULL;
2280 struct page *page, *discard_page = NULL;
2281
2282 while ((page = c->partial)) {
2283 struct page new;
2284 struct page old;
2285
2286 c->partial = page->next;
2287
2288 n2 = get_node(s, page_to_nid(page));
2289 if (n != n2) {
2290 if (n)
2291 spin_unlock(&n->list_lock);
2292
2293 n = n2;
2294 spin_lock(&n->list_lock);
2295 }
2296
2297 do {
2298
2299 old.freelist = page->freelist;
2300 old.counters = page->counters;
2301 VM_BUG_ON(!old.frozen);
2302
2303 new.counters = old.counters;
2304 new.freelist = old.freelist;
2305
2306 new.frozen = 0;
2307
2308 } while (!__cmpxchg_double_slab(s, page,
2309 old.freelist, old.counters,
2310 new.freelist, new.counters,
2311 "unfreezing slab"));
2312
2313 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2314 page->next = discard_page;
2315 discard_page = page;
2316 } else {
2317 add_partial(n, page, DEACTIVATE_TO_TAIL);
2318 stat(s, FREE_ADD_PARTIAL);
2319 }
2320 }
2321
2322 if (n)
2323 spin_unlock(&n->list_lock);
2324
2325 while (discard_page) {
2326 page = discard_page;
2327 discard_page = discard_page->next;
2328
2329 stat(s, DEACTIVATE_EMPTY);
2330 discard_slab(s, page);
2331 stat(s, FREE_SLAB);
2332 }
2333#endif
2334}
2335
2336/*
2337 * Put a page that was just frozen (in __slab_free) into a partial page
2338 * slot if available. This is done without interrupts disabled and without
2339 * preemption disabled. The cmpxchg is racy and may put the partial page
2340 * onto a random cpus partial slot.
2341 *
2342 * If we did not find a slot then simply move all the partials to the
2343 * per node partial list.
2344 */
2345static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2346{
2347#ifdef CONFIG_SLUB_CPU_PARTIAL
2348 struct page *oldpage;
2349 int pages;
2350 int pobjects;
2351
2352 preempt_disable();
2353 do {
2354 pages = 0;
2355 pobjects = 0;
2356 oldpage = this_cpu_read(s->cpu_slab->partial);
2357
2358 if (oldpage) {
2359 pobjects = oldpage->pobjects;
2360 pages = oldpage->pages;
2361 if (drain && pobjects > s->cpu_partial) {
2362 unsigned long flags;
2363 /*
2364 * partial array is full. Move the existing
2365 * set to the per node partial list.
2366 */
2367 local_irq_save(flags);
2368 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2369 local_irq_restore(flags);
2370 oldpage = NULL;
2371 pobjects = 0;
2372 pages = 0;
2373 stat(s, CPU_PARTIAL_DRAIN);
2374 }
2375 }
2376
2377 pages++;
2378 pobjects += page->objects - page->inuse;
2379
2380 page->pages = pages;
2381 page->pobjects = pobjects;
2382 page->next = oldpage;
2383
2384 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2385 != oldpage);
2386 if (unlikely(!s->cpu_partial)) {
2387 unsigned long flags;
2388
2389 local_irq_save(flags);
2390 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2391 local_irq_restore(flags);
2392 }
2393 preempt_enable();
2394#endif
2395}
2396
2397static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2398{
2399 stat(s, CPUSLAB_FLUSH);
2400 deactivate_slab(s, c->page, c->freelist);
2401
2402 c->tid = next_tid(c->tid);
2403 c->page = NULL;
2404 c->freelist = NULL;
2405}
2406
2407/*
2408 * Flush cpu slab.
2409 *
2410 * Called from IPI handler with interrupts disabled.
2411 */
2412static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2413{
2414 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2415
2416 if (likely(c)) {
2417 if (c->page)
2418 flush_slab(s, c);
2419
2420 unfreeze_partials(s, c);
2421 }
2422}
2423
2424static void flush_cpu_slab(void *d)
2425{
2426 struct kmem_cache *s = d;
2427
2428 __flush_cpu_slab(s, smp_processor_id());
2429}
2430
2431static bool has_cpu_slab(int cpu, void *info)
2432{
2433 struct kmem_cache *s = info;
2434 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2435
2436 return c->page || c->partial;
2437}
2438
2439static void flush_all(struct kmem_cache *s)
2440{
2441 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2442}
2443
2444/*
2445 * Check if the objects in a per cpu structure fit numa
2446 * locality expectations.
2447 */
2448static inline int node_match(struct page *page, int node)
2449{
2450#ifdef CONFIG_NUMA
2451 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2452 return 0;
2453#endif
2454 return 1;
2455}
2456
2457#ifdef CONFIG_SLUB_DEBUG
2458static int count_free(struct page *page)
2459{
2460 return page->objects - page->inuse;
2461}
2462
2463static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2464{
2465 return atomic_long_read(&n->total_objects);
2466}
2467#endif /* CONFIG_SLUB_DEBUG */
2468
2469#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2470static unsigned long count_partial(struct kmem_cache_node *n,
2471 int (*get_count)(struct page *))
2472{
2473 unsigned long flags;
2474 unsigned long x = 0;
2475 struct page *page;
2476
2477 spin_lock_irqsave(&n->list_lock, flags);
2478 list_for_each_entry(page, &n->partial, lru)
2479 x += get_count(page);
2480 spin_unlock_irqrestore(&n->list_lock, flags);
2481 return x;
2482}
2483#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
2484
2485static noinline void
2486slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2487{
2488#ifdef CONFIG_SLUB_DEBUG
2489 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2490 DEFAULT_RATELIMIT_BURST);
2491 int node;
2492 struct kmem_cache_node *n;
2493
2494 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2495 return;
2496
2497 pr_warn("SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2498 nid, gfpflags);
2499 pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n",
2500 s->name, s->object_size, s->size, oo_order(s->oo),
2501 oo_order(s->min));
2502
2503 if (oo_order(s->min) > get_order(s->object_size))
2504 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2505 s->name);
2506
2507 for_each_kmem_cache_node(s, node, n) {
2508 unsigned long nr_slabs;
2509 unsigned long nr_objs;
2510 unsigned long nr_free;
2511
2512 nr_free = count_partial(n, count_free);
2513 nr_slabs = node_nr_slabs(n);
2514 nr_objs = node_nr_objs(n);
2515
2516 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2517 node, nr_slabs, nr_objs, nr_free);
2518 }
2519#endif
2520}
2521
2522static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2523 int node, struct kmem_cache_cpu **pc)
2524{
2525 void *freelist;
2526 struct kmem_cache_cpu *c = *pc;
2527 struct page *page;
2528
2529 freelist = get_partial(s, flags, node, c);
2530
2531 if (freelist)
2532 return freelist;
2533
2534 page = new_slab(s, flags, node);
2535 if (page) {
2536 c = raw_cpu_ptr(s->cpu_slab);
2537 if (c->page)
2538 flush_slab(s, c);
2539
2540 /*
2541 * No other reference to the page yet so we can
2542 * muck around with it freely without cmpxchg
2543 */
2544 freelist = page->freelist;
2545 page->freelist = NULL;
2546
2547 stat(s, ALLOC_SLAB);
2548 c->page = page;
2549 *pc = c;
2550 } else
2551 freelist = NULL;
2552
2553 return freelist;
2554}
2555
2556static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2557{
2558 if (unlikely(PageSlabPfmemalloc(page)))
2559 return gfp_pfmemalloc_allowed(gfpflags);
2560
2561 return true;
2562}
2563
2564/*
2565 * Check the page->freelist of a page and either transfer the freelist to the
2566 * per cpu freelist or deactivate the page.
2567 *
2568 * The page is still frozen if the return value is not NULL.
2569 *
2570 * If this function returns NULL then the page has been unfrozen.
2571 *
2572 * This function must be called with interrupt disabled.
2573 */
2574static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2575{
2576 struct page new;
2577 unsigned long counters;
2578 void *freelist;
2579
2580 do {
2581 freelist = page->freelist;
2582 counters = page->counters;
2583
2584 new.counters = counters;
2585 VM_BUG_ON(!new.frozen);
2586
2587 new.inuse = page->objects;
2588 new.frozen = freelist != NULL;
2589
2590 } while (!__cmpxchg_double_slab(s, page,
2591 freelist, counters,
2592 NULL, new.counters,
2593 "get_freelist"));
2594
2595 return freelist;
2596}
2597
2598/*
2599 * Slow path. The lockless freelist is empty or we need to perform
2600 * debugging duties.
2601 *
2602 * Processing is still very fast if new objects have been freed to the
2603 * regular freelist. In that case we simply take over the regular freelist
2604 * as the lockless freelist and zap the regular freelist.
2605 *
2606 * If that is not working then we fall back to the partial lists. We take the
2607 * first element of the freelist as the object to allocate now and move the
2608 * rest of the freelist to the lockless freelist.
2609 *
2610 * And if we were unable to get a new slab from the partial slab lists then
2611 * we need to allocate a new slab. This is the slowest path since it involves
2612 * a call to the page allocator and the setup of a new slab.
2613 *
2614 * Version of __slab_alloc to use when we know that interrupts are
2615 * already disabled (which is the case for bulk allocation).
2616 */
2617static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2618 unsigned long addr, struct kmem_cache_cpu *c)
2619{
2620 void *freelist;
2621 struct page *page;
2622
2623 page = c->page;
2624 if (!page)
2625 goto new_slab;
2626redo:
2627
2628 if (unlikely(!node_match(page, node))) {
2629 int searchnode = node;
2630
2631 if (node != NUMA_NO_NODE && !node_present_pages(node))
2632 searchnode = node_to_mem_node(node);
2633
2634 if (unlikely(!node_match(page, searchnode))) {
2635 stat(s, ALLOC_NODE_MISMATCH);
2636 deactivate_slab(s, page, c->freelist);
2637 c->page = NULL;
2638 c->freelist = NULL;
2639 goto new_slab;
2640 }
2641 }
2642
2643 /*
2644 * By rights, we should be searching for a slab page that was
2645 * PFMEMALLOC but right now, we are losing the pfmemalloc
2646 * information when the page leaves the per-cpu allocator
2647 */
2648 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2649 deactivate_slab(s, page, c->freelist);
2650 c->page = NULL;
2651 c->freelist = NULL;
2652 goto new_slab;
2653 }
2654
2655 /* must check again c->freelist in case of cpu migration or IRQ */
2656 freelist = c->freelist;
2657 if (freelist)
2658 goto load_freelist;
2659
2660 freelist = get_freelist(s, page);
2661
2662 if (!freelist) {
2663 c->page = NULL;
2664 stat(s, DEACTIVATE_BYPASS);
2665 goto new_slab;
2666 }
2667
2668 stat(s, ALLOC_REFILL);
2669
2670load_freelist:
2671 /*
2672 * freelist is pointing to the list of objects to be used.
2673 * page is pointing to the page from which the objects are obtained.
2674 * That page must be frozen for per cpu allocations to work.
2675 */
2676 VM_BUG_ON(!c->page->frozen);
2677 c->freelist = get_freepointer(s, freelist);
2678 c->tid = next_tid(c->tid);
2679 return freelist;
2680
2681new_slab:
2682
2683 if (c->partial) {
2684 page = c->page = c->partial;
2685 c->partial = page->next;
2686 stat(s, CPU_PARTIAL_ALLOC);
2687 c->freelist = NULL;
2688 goto redo;
2689 }
2690
2691 freelist = new_slab_objects(s, gfpflags, node, &c);
2692
2693 if (unlikely(!freelist)) {
2694 slab_out_of_memory(s, gfpflags, node);
2695 return NULL;
2696 }
2697
2698 page = c->page;
2699 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2700 goto load_freelist;
2701
2702 /* Only entered in the debug case */
2703 if (kmem_cache_debug(s) &&
2704 !alloc_debug_processing(s, page, freelist, addr))
2705 goto new_slab; /* Slab failed checks. Next slab needed */
2706
2707 deactivate_slab(s, page, get_freepointer(s, freelist));
2708 c->page = NULL;
2709 c->freelist = NULL;
2710 return freelist;
2711}
2712
2713/*
2714 * Another one that disabled interrupt and compensates for possible
2715 * cpu changes by refetching the per cpu area pointer.
2716 */
2717static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2718 unsigned long addr, struct kmem_cache_cpu *c)
2719{
2720 void *p;
2721 unsigned long flags;
2722
2723 local_irq_save(flags);
2724#ifdef CONFIG_PREEMPT
2725 /*
2726 * We may have been preempted and rescheduled on a different
2727 * cpu before disabling interrupts. Need to reload cpu area
2728 * pointer.
2729 */
2730 c = this_cpu_ptr(s->cpu_slab);
2731#endif
2732
2733 p = ___slab_alloc(s, gfpflags, node, addr, c);
2734 local_irq_restore(flags);
2735 return p;
2736}
2737
2738/*
2739 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
2740 * have the fastpath folded into their functions. So no function call
2741 * overhead for requests that can be satisfied on the fastpath.
2742 *
2743 * The fastpath works by first checking if the lockless freelist can be used.
2744 * If not then __slab_alloc is called for slow processing.
2745 *
2746 * Otherwise we can simply pick the next object from the lockless free list.
2747 */
2748static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2749 gfp_t gfpflags, int node, unsigned long addr)
2750{
2751 void *object;
2752 struct kmem_cache_cpu *c;
2753 struct page *page;
2754 unsigned long tid;
2755
2756 s = slab_pre_alloc_hook(s, gfpflags);
2757 if (!s)
2758 return NULL;
2759redo:
2760 /*
2761 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2762 * enabled. We may switch back and forth between cpus while
2763 * reading from one cpu area. That does not matter as long
2764 * as we end up on the original cpu again when doing the cmpxchg.
2765 *
2766 * We should guarantee that tid and kmem_cache are retrieved on
2767 * the same cpu. It could be different if CONFIG_PREEMPT so we need
2768 * to check if it is matched or not.
2769 */
2770 do {
2771 tid = this_cpu_read(s->cpu_slab->tid);
2772 c = raw_cpu_ptr(s->cpu_slab);
2773 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2774 unlikely(tid != READ_ONCE(c->tid)));
2775
2776 /*
2777 * Irqless object alloc/free algorithm used here depends on sequence
2778 * of fetching cpu_slab's data. tid should be fetched before anything
2779 * on c to guarantee that object and page associated with previous tid
2780 * won't be used with current tid. If we fetch tid first, object and
2781 * page could be one associated with next tid and our alloc/free
2782 * request will be failed. In this case, we will retry. So, no problem.
2783 */
2784 barrier();
2785
2786 /*
2787 * The transaction ids are globally unique per cpu and per operation on
2788 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
2789 * occurs on the right processor and that there was no operation on the
2790 * linked list in between.
2791 */
2792
2793 object = c->freelist;
2794 page = c->page;
2795 if (unlikely(!object || !node_match(page, node))) {
2796 object = __slab_alloc(s, gfpflags, node, addr, c);
2797 stat(s, ALLOC_SLOWPATH);
2798 } else {
2799 void *next_object = get_freepointer_safe(s, object);
2800
2801 /*
2802 * The cmpxchg will only match if there was no additional
2803 * operation and if we are on the right processor.
2804 *
2805 * The cmpxchg does the following atomically (without lock
2806 * semantics!)
2807 * 1. Relocate first pointer to the current per cpu area.
2808 * 2. Verify that tid and freelist have not been changed
2809 * 3. If they were not changed replace tid and freelist
2810 *
2811 * Since this is without lock semantics the protection is only
2812 * against code executing on this cpu *not* from access by
2813 * other cpus.
2814 */
2815 if (unlikely(!this_cpu_cmpxchg_double(
2816 s->cpu_slab->freelist, s->cpu_slab->tid,
2817 object, tid,
2818 next_object, next_tid(tid)))) {
2819
2820 note_cmpxchg_failure("slab_alloc", s, tid);
2821 goto redo;
2822 }
2823 prefetch_freepointer(s, next_object);
2824 stat(s, ALLOC_FASTPATH);
2825 }
2826
2827 if (unlikely(gfpflags & __GFP_ZERO) && object)
2828 memset(object, 0, s->object_size);
2829
2830 slab_post_alloc_hook(s, gfpflags, 1, &object);
2831
2832 return object;
2833}
2834
2835static __always_inline void *slab_alloc(struct kmem_cache *s,
2836 gfp_t gfpflags, unsigned long addr)
2837{
2838 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2839}
2840
2841void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2842{
2843 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2844
2845 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2846 s->size, gfpflags);
2847
2848 return ret;
2849}
2850EXPORT_SYMBOL(kmem_cache_alloc);
2851
2852#ifdef CONFIG_TRACING
2853void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2854{
2855 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2856 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2857 kasan_kmalloc(s, ret, size, gfpflags);
2858 return ret;
2859}
2860EXPORT_SYMBOL(kmem_cache_alloc_trace);
2861#endif
2862
2863#ifdef CONFIG_NUMA
2864void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2865{
2866 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2867
2868 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2869 s->object_size, s->size, gfpflags, node);
2870
2871 return ret;
2872}
2873EXPORT_SYMBOL(kmem_cache_alloc_node);
2874
2875#ifdef CONFIG_TRACING
2876void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2877 gfp_t gfpflags,
2878 int node, size_t size)
2879{
2880 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2881
2882 trace_kmalloc_node(_RET_IP_, ret,
2883 size, s->size, gfpflags, node);
2884
2885 kasan_kmalloc(s, ret, size, gfpflags);
2886 return ret;
2887}
2888EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2889#endif
2890#endif
2891
2892/*
2893 * Slow path handling. This may still be called frequently since objects
2894 * have a longer lifetime than the cpu slabs in most processing loads.
2895 *
2896 * So we still attempt to reduce cache line usage. Just take the slab
2897 * lock and free the item. If there is no additional partial page
2898 * handling required then we can return immediately.
2899 */
2900static void __slab_free(struct kmem_cache *s, struct page *page,
2901 void *head, void *tail, int cnt,
2902 unsigned long addr)
2903
2904{
2905 void *prior;
2906 int was_frozen;
2907 struct page new;
2908 unsigned long counters;
2909 struct kmem_cache_node *n = NULL;
2910 unsigned long uninitialized_var(flags);
2911
2912 stat(s, FREE_SLOWPATH);
2913
2914 if (kmem_cache_debug(s) &&
2915 !(n = free_debug_processing(s, page, head, tail, cnt,
2916 addr, &flags)))
2917 return;
2918
2919 do {
2920 if (unlikely(n)) {
2921 spin_unlock_irqrestore(&n->list_lock, flags);
2922 n = NULL;
2923 }
2924 prior = page->freelist;
2925 counters = page->counters;
2926 set_freepointer(s, tail, prior);
2927 new.counters = counters;
2928 was_frozen = new.frozen;
2929 new.inuse -= cnt;
2930 if ((!new.inuse || !prior) && !was_frozen) {
2931
2932 if (kmem_cache_has_cpu_partial(s) && !prior) {
2933
2934 /*
2935 * Slab was on no list before and will be
2936 * partially empty
2937 * We can defer the list move and instead
2938 * freeze it.
2939 */
2940 new.frozen = 1;
2941
2942 } else { /* Needs to be taken off a list */
2943
2944 n = get_node(s, page_to_nid(page));
2945 /*
2946 * Speculatively acquire the list_lock.
2947 * If the cmpxchg does not succeed then we may
2948 * drop the list_lock without any processing.
2949 *
2950 * Otherwise the list_lock will synchronize with
2951 * other processors updating the list of slabs.
2952 */
2953 spin_lock_irqsave(&n->list_lock, flags);
2954
2955 }
2956 }
2957
2958 } while (!cmpxchg_double_slab(s, page,
2959 prior, counters,
2960 head, new.counters,
2961 "__slab_free"));
2962
2963 if (likely(!n)) {
2964
2965 /*
2966 * If we just froze the page then put it onto the
2967 * per cpu partial list.
2968 */
2969 if (new.frozen && !was_frozen) {
2970 put_cpu_partial(s, page, 1);
2971 stat(s, CPU_PARTIAL_FREE);
2972 }
2973 /*
2974 * The list lock was not taken therefore no list
2975 * activity can be necessary.
2976 */
2977 if (was_frozen)
2978 stat(s, FREE_FROZEN);
2979 return;
2980 }
2981
2982 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2983 goto slab_empty;
2984
2985 /*
2986 * Objects left in the slab. If it was not on the partial list before
2987 * then add it.
2988 */
2989 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2990 if (kmem_cache_debug(s))
2991 remove_full(s, n, page);
2992 add_partial(n, page, DEACTIVATE_TO_TAIL);
2993 stat(s, FREE_ADD_PARTIAL);
2994 }
2995 spin_unlock_irqrestore(&n->list_lock, flags);
2996 return;
2997
2998slab_empty:
2999 if (prior) {
3000 /*
3001 * Slab on the partial list.
3002 */
3003 remove_partial(n, page);
3004 stat(s, FREE_REMOVE_PARTIAL);
3005 } else {
3006 /* Slab must be on the full list */
3007 remove_full(s, n, page);
3008 }
3009
3010 spin_unlock_irqrestore(&n->list_lock, flags);
3011 stat(s, FREE_SLAB);
3012 discard_slab(s, page);
3013}
3014
3015/*
3016 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
3017 * can perform fastpath freeing without additional function calls.
3018 *
3019 * The fastpath is only possible if we are freeing to the current cpu slab
3020 * of this processor. This typically the case if we have just allocated
3021 * the item before.
3022 *
3023 * If fastpath is not possible then fall back to __slab_free where we deal
3024 * with all sorts of special processing.
3025 *
3026 * Bulk free of a freelist with several objects (all pointing to the
3027 * same page) possible by specifying head and tail ptr, plus objects
3028 * count (cnt). Bulk free indicated by tail pointer being set.
3029 */
3030static __always_inline void do_slab_free(struct kmem_cache *s,
3031 struct page *page, void *head, void *tail,
3032 int cnt, unsigned long addr)
3033{
3034 void *tail_obj = tail ? : head;
3035 struct kmem_cache_cpu *c;
3036 unsigned long tid;
3037redo:
3038 /*
3039 * Determine the currently cpus per cpu slab.
3040 * The cpu may change afterward. However that does not matter since
3041 * data is retrieved via this pointer. If we are on the same cpu
3042 * during the cmpxchg then the free will succeed.
3043 */
3044 do {
3045 tid = this_cpu_read(s->cpu_slab->tid);
3046 c = raw_cpu_ptr(s->cpu_slab);
3047 } while (IS_ENABLED(CONFIG_PREEMPT) &&
3048 unlikely(tid != READ_ONCE(c->tid)));
3049
3050 /* Same with comment on barrier() in slab_alloc_node() */
3051 barrier();
3052
3053 if (likely(page == c->page)) {
3054 set_freepointer(s, tail_obj, c->freelist);
3055
3056 if (unlikely(!this_cpu_cmpxchg_double(
3057 s->cpu_slab->freelist, s->cpu_slab->tid,
3058 c->freelist, tid,
3059 head, next_tid(tid)))) {
3060
3061 note_cmpxchg_failure("slab_free", s, tid);
3062 goto redo;
3063 }
3064 stat(s, FREE_FASTPATH);
3065 } else
3066 __slab_free(s, page, head, tail_obj, cnt, addr);
3067
3068}
3069
3070static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3071 void *head, void *tail, int cnt,
3072 unsigned long addr)
3073{
3074 slab_free_freelist_hook(s, head, tail);
3075 /*
3076 * slab_free_freelist_hook() could have put the items into quarantine.
3077 * If so, no need to free them.
3078 */
3079 if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU))
3080 return;
3081 do_slab_free(s, page, head, tail, cnt, addr);
3082}
3083
3084#ifdef CONFIG_KASAN
3085void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3086{
3087 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3088}
3089#endif
3090
3091void kmem_cache_free(struct kmem_cache *s, void *x)
3092{
3093 s = cache_from_obj(s, x);
3094 if (!s)
3095 return;
3096 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3097 trace_kmem_cache_free(_RET_IP_, x);
3098}
3099EXPORT_SYMBOL(kmem_cache_free);
3100
3101struct detached_freelist {
3102 struct page *page;
3103 void *tail;
3104 void *freelist;
3105 int cnt;
3106 struct kmem_cache *s;
3107};
3108
3109/*
3110 * This function progressively scans the array with free objects (with
3111 * a limited look ahead) and extract objects belonging to the same
3112 * page. It builds a detached freelist directly within the given
3113 * page/objects. This can happen without any need for
3114 * synchronization, because the objects are owned by running process.
3115 * The freelist is build up as a single linked list in the objects.
3116 * The idea is, that this detached freelist can then be bulk
3117 * transferred to the real freelist(s), but only requiring a single
3118 * synchronization primitive. Look ahead in the array is limited due
3119 * to performance reasons.
3120 */
3121static inline
3122int build_detached_freelist(struct kmem_cache *s, size_t size,
3123 void **p, struct detached_freelist *df)
3124{
3125 size_t first_skipped_index = 0;
3126 int lookahead = 3;
3127 void *object;
3128
3129 /* Always re-init detached_freelist */
3130 df->page = NULL;
3131
3132 do {
3133 object = p[--size];
3134 } while (!object && size);
3135
3136 if (!object)
3137 return 0;
3138
3139 /* Support for memcg, compiler can optimize this out */
3140 df->s = cache_from_obj(s, object);
3141
3142 /* Start new detached freelist */
3143 set_freepointer(df->s, object, NULL);
3144 df->page = virt_to_head_page(object);
3145 df->tail = object;
3146 df->freelist = object;
3147 p[size] = NULL; /* mark object processed */
3148 df->cnt = 1;
3149
3150 while (size) {
3151 object = p[--size];
3152 if (!object)
3153 continue; /* Skip processed objects */
3154
3155 /* df->page is always set at this point */
3156 if (df->page == virt_to_head_page(object)) {
3157 /* Opportunity build freelist */
3158 set_freepointer(df->s, object, df->freelist);
3159 df->freelist = object;
3160 df->cnt++;
3161 p[size] = NULL; /* mark object processed */
3162
3163 continue;
3164 }
3165
3166 /* Limit look ahead search */
3167 if (!--lookahead)
3168 break;
3169
3170 if (!first_skipped_index)
3171 first_skipped_index = size + 1;
3172 }
3173
3174 return first_skipped_index;
3175}
3176
3177/* Note that interrupts must be enabled when calling this function. */
3178void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3179{
3180 if (WARN_ON(!size))
3181 return;
3182
3183 do {
3184 struct detached_freelist df;
3185
3186 size = build_detached_freelist(s, size, p, &df);
3187 if (unlikely(!df.page))
3188 continue;
3189
3190 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3191 } while (likely(size));
3192}
3193EXPORT_SYMBOL(kmem_cache_free_bulk);
3194
3195/* Note that interrupts must be enabled when calling this function. */
3196int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3197 void **p)
3198{
3199 struct kmem_cache_cpu *c;
3200 int i;
3201
3202 /* memcg and kmem_cache debug support */
3203 s = slab_pre_alloc_hook(s, flags);
3204 if (unlikely(!s))
3205 return false;
3206 /*
3207 * Drain objects in the per cpu slab, while disabling local
3208 * IRQs, which protects against PREEMPT and interrupts
3209 * handlers invoking normal fastpath.
3210 */
3211 local_irq_disable();
3212 c = this_cpu_ptr(s->cpu_slab);
3213
3214 for (i = 0; i < size; i++) {
3215 void *object = c->freelist;
3216
3217 if (unlikely(!object)) {
3218 /*
3219 * Invoking slow path likely have side-effect
3220 * of re-populating per CPU c->freelist
3221 */
3222 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3223 _RET_IP_, c);
3224 if (unlikely(!p[i]))
3225 goto error;
3226
3227 c = this_cpu_ptr(s->cpu_slab);
3228 continue; /* goto for-loop */
3229 }
3230 c->freelist = get_freepointer(s, object);
3231 p[i] = object;
3232 }
3233 c->tid = next_tid(c->tid);
3234 local_irq_enable();
3235
3236 /* Clear memory outside IRQ disabled fastpath loop */
3237 if (unlikely(flags & __GFP_ZERO)) {
3238 int j;
3239
3240 for (j = 0; j < i; j++)
3241 memset(p[j], 0, s->object_size);
3242 }
3243
3244 /* memcg and kmem_cache debug support */
3245 slab_post_alloc_hook(s, flags, size, p);
3246 return i;
3247error:
3248 local_irq_enable();
3249 slab_post_alloc_hook(s, flags, i, p);
3250 __kmem_cache_free_bulk(s, i, p);
3251 return 0;
3252}
3253EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3254
3255
3256/*
3257 * Object placement in a slab is made very easy because we always start at
3258 * offset 0. If we tune the size of the object to the alignment then we can
3259 * get the required alignment by putting one properly sized object after
3260 * another.
3261 *
3262 * Notice that the allocation order determines the sizes of the per cpu
3263 * caches. Each processor has always one slab available for allocations.
3264 * Increasing the allocation order reduces the number of times that slabs
3265 * must be moved on and off the partial lists and is therefore a factor in
3266 * locking overhead.
3267 */
3268
3269/*
3270 * Mininum / Maximum order of slab pages. This influences locking overhead
3271 * and slab fragmentation. A higher order reduces the number of partial slabs
3272 * and increases the number of allocations possible without having to
3273 * take the list_lock.
3274 */
3275static int slub_min_order;
3276static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3277static int slub_min_objects;
3278
3279/*
3280 * Calculate the order of allocation given an slab object size.
3281 *
3282 * The order of allocation has significant impact on performance and other
3283 * system components. Generally order 0 allocations should be preferred since
3284 * order 0 does not cause fragmentation in the page allocator. Larger objects
3285 * be problematic to put into order 0 slabs because there may be too much
3286 * unused space left. We go to a higher order if more than 1/16th of the slab
3287 * would be wasted.
3288 *
3289 * In order to reach satisfactory performance we must ensure that a minimum
3290 * number of objects is in one slab. Otherwise we may generate too much
3291 * activity on the partial lists which requires taking the list_lock. This is
3292 * less a concern for large slabs though which are rarely used.
3293 *
3294 * slub_max_order specifies the order where we begin to stop considering the
3295 * number of objects in a slab as critical. If we reach slub_max_order then
3296 * we try to keep the page order as low as possible. So we accept more waste
3297 * of space in favor of a small page order.
3298 *
3299 * Higher order allocations also allow the placement of more objects in a
3300 * slab and thereby reduce object handling overhead. If the user has
3301 * requested a higher mininum order then we start with that one instead of
3302 * the smallest order which will fit the object.
3303 */
3304static inline int slab_order(int size, int min_objects,
3305 int max_order, int fract_leftover, int reserved)
3306{
3307 int order;
3308 int rem;
3309 int min_order = slub_min_order;
3310
3311 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
3312 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3313
3314 for (order = max(min_order, get_order(min_objects * size + reserved));
3315 order <= max_order; order++) {
3316
3317 unsigned long slab_size = PAGE_SIZE << order;
3318
3319 rem = (slab_size - reserved) % size;
3320
3321 if (rem <= slab_size / fract_leftover)
3322 break;
3323 }
3324
3325 return order;
3326}
3327
3328static inline int calculate_order(int size, int reserved)
3329{
3330 int order;
3331 int min_objects;
3332 int fraction;
3333 int max_objects;
3334
3335 /*
3336 * Attempt to find best configuration for a slab. This
3337 * works by first attempting to generate a layout with
3338 * the best configuration and backing off gradually.
3339 *
3340 * First we increase the acceptable waste in a slab. Then
3341 * we reduce the minimum objects required in a slab.
3342 */
3343 min_objects = slub_min_objects;
3344 if (!min_objects)
3345 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3346 max_objects = order_objects(slub_max_order, size, reserved);
3347 min_objects = min(min_objects, max_objects);
3348
3349 while (min_objects > 1) {
3350 fraction = 16;
3351 while (fraction >= 4) {
3352 order = slab_order(size, min_objects,
3353 slub_max_order, fraction, reserved);
3354 if (order <= slub_max_order)
3355 return order;
3356 fraction /= 2;
3357 }
3358 min_objects--;
3359 }
3360
3361 /*
3362 * We were unable to place multiple objects in a slab. Now
3363 * lets see if we can place a single object there.
3364 */
3365 order = slab_order(size, 1, slub_max_order, 1, reserved);
3366 if (order <= slub_max_order)
3367 return order;
3368
3369 /*
3370 * Doh this slab cannot be placed using slub_max_order.
3371 */
3372 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
3373 if (order < MAX_ORDER)
3374 return order;
3375 return -ENOSYS;
3376}
3377
3378static void
3379init_kmem_cache_node(struct kmem_cache_node *n)
3380{
3381 n->nr_partial = 0;
3382 spin_lock_init(&n->list_lock);
3383 INIT_LIST_HEAD(&n->partial);
3384#ifdef CONFIG_SLUB_DEBUG
3385 atomic_long_set(&n->nr_slabs, 0);
3386 atomic_long_set(&n->total_objects, 0);
3387 INIT_LIST_HEAD(&n->full);
3388#endif
3389}
3390
3391static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3392{
3393 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3394 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3395
3396 /*
3397 * Must align to double word boundary for the double cmpxchg
3398 * instructions to work; see __pcpu_double_call_return_bool().
3399 */
3400 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3401 2 * sizeof(void *));
3402
3403 if (!s->cpu_slab)
3404 return 0;
3405
3406 init_kmem_cache_cpus(s);
3407
3408 return 1;
3409}
3410
3411static struct kmem_cache *kmem_cache_node;
3412
3413/*
3414 * No kmalloc_node yet so do it by hand. We know that this is the first
3415 * slab on the node for this slabcache. There are no concurrent accesses
3416 * possible.
3417 *
3418 * Note that this function only works on the kmem_cache_node
3419 * when allocating for the kmem_cache_node. This is used for bootstrapping
3420 * memory on a fresh node that has no slab structures yet.
3421 */
3422static void early_kmem_cache_node_alloc(int node)
3423{
3424 struct page *page;
3425 struct kmem_cache_node *n;
3426
3427 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3428
3429 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3430
3431 BUG_ON(!page);
3432 if (page_to_nid(page) != node) {
3433 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3434 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3435 }
3436
3437 n = page->freelist;
3438 BUG_ON(!n);
3439 page->freelist = get_freepointer(kmem_cache_node, n);
3440 page->inuse = 1;
3441 page->frozen = 0;
3442 kmem_cache_node->node[node] = n;
3443#ifdef CONFIG_SLUB_DEBUG
3444 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3445 init_tracking(kmem_cache_node, n);
3446#endif
3447 kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3448 GFP_KERNEL);
3449 init_kmem_cache_node(n);
3450 inc_slabs_node(kmem_cache_node, node, page->objects);
3451
3452 /*
3453 * No locks need to be taken here as it has just been
3454 * initialized and there is no concurrent access.
3455 */
3456 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3457}
3458
3459static void free_kmem_cache_nodes(struct kmem_cache *s)
3460{
3461 int node;
3462 struct kmem_cache_node *n;
3463
3464 for_each_kmem_cache_node(s, node, n) {
3465 kmem_cache_free(kmem_cache_node, n);
3466 s->node[node] = NULL;
3467 }
3468}
3469
3470static int init_kmem_cache_nodes(struct kmem_cache *s)
3471{
3472 int node;
3473
3474 for_each_node_state(node, N_NORMAL_MEMORY) {
3475 struct kmem_cache_node *n;
3476
3477 if (slab_state == DOWN) {
3478 early_kmem_cache_node_alloc(node);
3479 continue;
3480 }
3481 n = kmem_cache_alloc_node(kmem_cache_node,
3482 GFP_KERNEL, node);
3483
3484 if (!n) {
3485 free_kmem_cache_nodes(s);
3486 return 0;
3487 }
3488
3489 s->node[node] = n;
3490 init_kmem_cache_node(n);
3491 }
3492 return 1;
3493}
3494
3495static void set_min_partial(struct kmem_cache *s, unsigned long min)
3496{
3497 if (min < MIN_PARTIAL)
3498 min = MIN_PARTIAL;
3499 else if (min > MAX_PARTIAL)
3500 min = MAX_PARTIAL;
3501 s->min_partial = min;
3502}
3503
3504/*
3505 * calculate_sizes() determines the order and the distribution of data within
3506 * a slab object.
3507 */
3508static int calculate_sizes(struct kmem_cache *s, int forced_order)
3509{
3510 unsigned long flags = s->flags;
3511 size_t size = s->object_size;
3512 int order;
3513
3514 /*
3515 * Round up object size to the next word boundary. We can only
3516 * place the free pointer at word boundaries and this determines
3517 * the possible location of the free pointer.
3518 */
3519 size = ALIGN(size, sizeof(void *));
3520
3521#ifdef CONFIG_SLUB_DEBUG
3522 /*
3523 * Determine if we can poison the object itself. If the user of
3524 * the slab may touch the object after free or before allocation
3525 * then we should never poison the object itself.
3526 */
3527 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
3528 !s->ctor)
3529 s->flags |= __OBJECT_POISON;
3530 else
3531 s->flags &= ~__OBJECT_POISON;
3532
3533
3534 /*
3535 * If we are Redzoning then check if there is some space between the
3536 * end of the object and the free pointer. If not then add an
3537 * additional word to have some bytes to store Redzone information.
3538 */
3539 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3540 size += sizeof(void *);
3541#endif
3542
3543 /*
3544 * With that we have determined the number of bytes in actual use
3545 * by the object. This is the potential offset to the free pointer.
3546 */
3547 s->inuse = size;
3548
3549 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
3550 s->ctor)) {
3551 /*
3552 * Relocate free pointer after the object if it is not
3553 * permitted to overwrite the first word of the object on
3554 * kmem_cache_free.
3555 *
3556 * This is the case if we do RCU, have a constructor or
3557 * destructor or are poisoning the objects.
3558 */
3559 s->offset = size;
3560 size += sizeof(void *);
3561 }
3562
3563#ifdef CONFIG_SLUB_DEBUG
3564 if (flags & SLAB_STORE_USER)
3565 /*
3566 * Need to store information about allocs and frees after
3567 * the object.
3568 */
3569 size += 2 * sizeof(struct track);
3570#endif
3571
3572 kasan_cache_create(s, &size, &s->flags);
3573#ifdef CONFIG_SLUB_DEBUG
3574 if (flags & SLAB_RED_ZONE) {
3575 /*
3576 * Add some empty padding so that we can catch
3577 * overwrites from earlier objects rather than let
3578 * tracking information or the free pointer be
3579 * corrupted if a user writes before the start
3580 * of the object.
3581 */
3582 size += sizeof(void *);
3583
3584 s->red_left_pad = sizeof(void *);
3585 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3586 size += s->red_left_pad;
3587 }
3588#endif
3589
3590 /*
3591 * SLUB stores one object immediately after another beginning from
3592 * offset 0. In order to align the objects we have to simply size
3593 * each object to conform to the alignment.
3594 */
3595 size = ALIGN(size, s->align);
3596 s->size = size;
3597 if (forced_order >= 0)
3598 order = forced_order;
3599 else
3600 order = calculate_order(size, s->reserved);
3601
3602 if (order < 0)
3603 return 0;
3604
3605 s->allocflags = 0;
3606 if (order)
3607 s->allocflags |= __GFP_COMP;
3608
3609 if (s->flags & SLAB_CACHE_DMA)
3610 s->allocflags |= GFP_DMA;
3611
3612 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3613 s->allocflags |= __GFP_RECLAIMABLE;
3614
3615 /*
3616 * Determine the number of objects per slab
3617 */
3618 s->oo = oo_make(order, size, s->reserved);
3619 s->min = oo_make(get_order(size), size, s->reserved);
3620 if (oo_objects(s->oo) > oo_objects(s->max))
3621 s->max = s->oo;
3622
3623 return !!oo_objects(s->oo);
3624}
3625
3626static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3627{
3628 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3629 s->reserved = 0;
3630
3631 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3632 s->reserved = sizeof(struct rcu_head);
3633
3634 if (!calculate_sizes(s, -1))
3635 goto error;
3636 if (disable_higher_order_debug) {
3637 /*
3638 * Disable debugging flags that store metadata if the min slab
3639 * order increased.
3640 */
3641 if (get_order(s->size) > get_order(s->object_size)) {
3642 s->flags &= ~DEBUG_METADATA_FLAGS;
3643 s->offset = 0;
3644 if (!calculate_sizes(s, -1))
3645 goto error;
3646 }
3647 }
3648
3649#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3650 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3651 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3652 /* Enable fast mode */
3653 s->flags |= __CMPXCHG_DOUBLE;
3654#endif
3655
3656 /*
3657 * The larger the object size is, the more pages we want on the partial
3658 * list to avoid pounding the page allocator excessively.
3659 */
3660 set_min_partial(s, ilog2(s->size) / 2);
3661
3662 /*
3663 * cpu_partial determined the maximum number of objects kept in the
3664 * per cpu partial lists of a processor.
3665 *
3666 * Per cpu partial lists mainly contain slabs that just have one
3667 * object freed. If they are used for allocation then they can be
3668 * filled up again with minimal effort. The slab will never hit the
3669 * per node partial lists and therefore no locking will be required.
3670 *
3671 * This setting also determines
3672 *
3673 * A) The number of objects from per cpu partial slabs dumped to the
3674 * per node list when we reach the limit.
3675 * B) The number of objects in cpu partial slabs to extract from the
3676 * per node list when we run out of per cpu objects. We only fetch
3677 * 50% to keep some capacity around for frees.
3678 */
3679 if (!kmem_cache_has_cpu_partial(s))
3680 s->cpu_partial = 0;
3681 else if (s->size >= PAGE_SIZE)
3682 s->cpu_partial = 2;
3683 else if (s->size >= 1024)
3684 s->cpu_partial = 6;
3685 else if (s->size >= 256)
3686 s->cpu_partial = 13;
3687 else
3688 s->cpu_partial = 30;
3689
3690#ifdef CONFIG_NUMA
3691 s->remote_node_defrag_ratio = 1000;
3692#endif
3693 if (!init_kmem_cache_nodes(s))
3694 goto error;
3695
3696 if (alloc_kmem_cache_cpus(s))
3697 return 0;
3698
3699 free_kmem_cache_nodes(s);
3700error:
3701 if (flags & SLAB_PANIC)
3702 panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n",
3703 s->name, (unsigned long)s->size, s->size,
3704 oo_order(s->oo), s->offset, flags);
3705 return -EINVAL;
3706}
3707
3708static void list_slab_objects(struct kmem_cache *s, struct page *page,
3709 const char *text)
3710{
3711#ifdef CONFIG_SLUB_DEBUG
3712 void *addr = page_address(page);
3713 void *p;
3714 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3715 sizeof(long), GFP_ATOMIC);
3716 if (!map)
3717 return;
3718 slab_err(s, page, text, s->name);
3719 slab_lock(page);
3720
3721 get_map(s, page, map);
3722 for_each_object(p, s, addr, page->objects) {
3723
3724 if (!test_bit(slab_index(p, s, addr), map)) {
3725 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3726 print_tracking(s, p);
3727 }
3728 }
3729 slab_unlock(page);
3730 kfree(map);
3731#endif
3732}
3733
3734/*
3735 * Attempt to free all partial slabs on a node.
3736 * This is called from kmem_cache_close(). We must be the last thread
3737 * using the cache and therefore we do not need to lock anymore.
3738 */
3739static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3740{
3741 struct page *page, *h;
3742
3743 list_for_each_entry_safe(page, h, &n->partial, lru) {
3744 if (!page->inuse) {
3745 __remove_partial(n, page);
3746 discard_slab(s, page);
3747 } else {
3748 list_slab_objects(s, page,
3749 "Objects remaining in %s on kmem_cache_close()");
3750 }
3751 }
3752}
3753
3754/*
3755 * Release all resources used by a slab cache.
3756 */
3757static inline int kmem_cache_close(struct kmem_cache *s)
3758{
3759 int node;
3760 struct kmem_cache_node *n;
3761
3762 flush_all(s);
3763 /* Attempt to free all objects */
3764 for_each_kmem_cache_node(s, node, n) {
3765 free_partial(s, n);
3766 if (n->nr_partial || slabs_node(s, node))
3767 return 1;
3768 }
3769 free_percpu(s->cpu_slab);
3770 free_kmem_cache_nodes(s);
3771 return 0;
3772}
3773
3774int __kmem_cache_shutdown(struct kmem_cache *s)
3775{
3776 return kmem_cache_close(s);
3777}
3778
3779/********************************************************************
3780 * Kmalloc subsystem
3781 *******************************************************************/
3782
3783static int __init setup_slub_min_order(char *str)
3784{
3785 get_option(&str, &slub_min_order);
3786
3787 return 1;
3788}
3789
3790__setup("slub_min_order=", setup_slub_min_order);
3791
3792static int __init setup_slub_max_order(char *str)
3793{
3794 get_option(&str, &slub_max_order);
3795 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3796
3797 return 1;
3798}
3799
3800__setup("slub_max_order=", setup_slub_max_order);
3801
3802static int __init setup_slub_min_objects(char *str)
3803{
3804 get_option(&str, &slub_min_objects);
3805
3806 return 1;
3807}
3808
3809__setup("slub_min_objects=", setup_slub_min_objects);
3810
3811void *__kmalloc(size_t size, gfp_t flags)
3812{
3813 struct kmem_cache *s;
3814 void *ret;
3815
3816 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3817 return kmalloc_large(size, flags);
3818
3819 s = kmalloc_slab(size, flags);
3820
3821 if (unlikely(ZERO_OR_NULL_PTR(s)))
3822 return s;
3823
3824 ret = slab_alloc(s, flags, _RET_IP_);
3825
3826 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3827
3828 kasan_kmalloc(s, ret, size, flags);
3829
3830 return ret;
3831}
3832EXPORT_SYMBOL(__kmalloc);
3833
3834#ifdef CONFIG_NUMA
3835static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3836{
3837 struct page *page;
3838 void *ptr = NULL;
3839
3840 flags |= __GFP_COMP | __GFP_NOTRACK;
3841 page = alloc_kmem_pages_node(node, flags, get_order(size));
3842 if (page)
3843 ptr = page_address(page);
3844
3845 kmalloc_large_node_hook(ptr, size, flags);
3846 return ptr;
3847}
3848
3849void *__kmalloc_node(size_t size, gfp_t flags, int node)
3850{
3851 struct kmem_cache *s;
3852 void *ret;
3853
3854 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3855 ret = kmalloc_large_node(size, flags, node);
3856
3857 trace_kmalloc_node(_RET_IP_, ret,
3858 size, PAGE_SIZE << get_order(size),
3859 flags, node);
3860
3861 return ret;
3862 }
3863
3864 s = kmalloc_slab(size, flags);
3865
3866 if (unlikely(ZERO_OR_NULL_PTR(s)))
3867 return s;
3868
3869 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3870
3871 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3872
3873 kasan_kmalloc(s, ret, size, flags);
3874
3875 return ret;
3876}
3877EXPORT_SYMBOL(__kmalloc_node);
3878#endif
3879
3880#ifdef CONFIG_HARDENED_USERCOPY
3881/*
3882 * Rejects objects that are incorrectly sized.
3883 *
3884 * Returns NULL if check passes, otherwise const char * to name of cache
3885 * to indicate an error.
3886 */
3887const char *__check_heap_object(const void *ptr, unsigned long n,
3888 struct page *page)
3889{
3890 struct kmem_cache *s;
3891 unsigned long offset;
3892 size_t object_size;
3893
3894 /* Find object and usable object size. */
3895 s = page->slab_cache;
3896 object_size = slab_ksize(s);
3897
3898 /* Reject impossible pointers. */
3899 if (ptr < page_address(page))
3900 return s->name;
3901
3902 /* Find offset within object. */
3903 offset = (ptr - page_address(page)) % s->size;
3904
3905 /* Adjust for redzone and reject if within the redzone. */
3906 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3907 if (offset < s->red_left_pad)
3908 return s->name;
3909 offset -= s->red_left_pad;
3910 }
3911
3912 /* Allow address range falling entirely within object size. */
3913 if (offset <= object_size && n <= object_size - offset)
3914 return NULL;
3915
3916 return s->name;
3917}
3918#endif /* CONFIG_HARDENED_USERCOPY */
3919
3920static size_t __ksize(const void *object)
3921{
3922 struct page *page;
3923
3924 if (unlikely(object == ZERO_SIZE_PTR))
3925 return 0;
3926
3927 page = virt_to_head_page(object);
3928
3929 if (unlikely(!PageSlab(page))) {
3930 WARN_ON(!PageCompound(page));
3931 return PAGE_SIZE << compound_order(page);
3932 }
3933
3934 return slab_ksize(page->slab_cache);
3935}
3936
3937size_t ksize(const void *object)
3938{
3939 size_t size = __ksize(object);
3940 /* We assume that ksize callers could use whole allocated area,
3941 so we need unpoison this area. */
3942 kasan_krealloc(object, size, GFP_NOWAIT);
3943 return size;
3944}
3945EXPORT_SYMBOL(ksize);
3946
3947void kfree(const void *x)
3948{
3949 struct page *page;
3950 void *object = (void *)x;
3951
3952 trace_kfree(_RET_IP_, x);
3953
3954 if (unlikely(ZERO_OR_NULL_PTR(x)))
3955 return;
3956
3957 page = virt_to_head_page(x);
3958 if (unlikely(!PageSlab(page))) {
3959 BUG_ON(!PageCompound(page));
3960 kfree_hook(x);
3961 __free_kmem_pages(page, compound_order(page));
3962 return;
3963 }
3964 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3965}
3966EXPORT_SYMBOL(kfree);
3967
3968#define SHRINK_PROMOTE_MAX 32
3969
3970/*
3971 * kmem_cache_shrink discards empty slabs and promotes the slabs filled
3972 * up most to the head of the partial lists. New allocations will then
3973 * fill those up and thus they can be removed from the partial lists.
3974 *
3975 * The slabs with the least items are placed last. This results in them
3976 * being allocated from last increasing the chance that the last objects
3977 * are freed in them.
3978 */
3979int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
3980{
3981 int node;
3982 int i;
3983 struct kmem_cache_node *n;
3984 struct page *page;
3985 struct page *t;
3986 struct list_head discard;
3987 struct list_head promote[SHRINK_PROMOTE_MAX];
3988 unsigned long flags;
3989 int ret = 0;
3990
3991 if (deactivate) {
3992 /*
3993 * Disable empty slabs caching. Used to avoid pinning offline
3994 * memory cgroups by kmem pages that can be freed.
3995 */
3996 s->cpu_partial = 0;
3997 s->min_partial = 0;
3998
3999 /*
4000 * s->cpu_partial is checked locklessly (see put_cpu_partial),
4001 * so we have to make sure the change is visible.
4002 */
4003 kick_all_cpus_sync();
4004 }
4005
4006 flush_all(s);
4007 for_each_kmem_cache_node(s, node, n) {
4008 INIT_LIST_HEAD(&discard);
4009 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4010 INIT_LIST_HEAD(promote + i);
4011
4012 spin_lock_irqsave(&n->list_lock, flags);
4013
4014 /*
4015 * Build lists of slabs to discard or promote.
4016 *
4017 * Note that concurrent frees may occur while we hold the
4018 * list_lock. page->inuse here is the upper limit.
4019 */
4020 list_for_each_entry_safe(page, t, &n->partial, lru) {
4021 int free = page->objects - page->inuse;
4022
4023 /* Do not reread page->inuse */
4024 barrier();
4025
4026 /* We do not keep full slabs on the list */
4027 BUG_ON(free <= 0);
4028
4029 if (free == page->objects) {
4030 list_move(&page->lru, &discard);
4031 n->nr_partial--;
4032 } else if (free <= SHRINK_PROMOTE_MAX)
4033 list_move(&page->lru, promote + free - 1);
4034 }
4035
4036 /*
4037 * Promote the slabs filled up most to the head of the
4038 * partial list.
4039 */
4040 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4041 list_splice(promote + i, &n->partial);
4042
4043 spin_unlock_irqrestore(&n->list_lock, flags);
4044
4045 /* Release empty slabs */
4046 list_for_each_entry_safe(page, t, &discard, lru)
4047 discard_slab(s, page);
4048
4049 if (slabs_node(s, node))
4050 ret = 1;
4051 }
4052
4053 return ret;
4054}
4055
4056static int slab_mem_going_offline_callback(void *arg)
4057{
4058 struct kmem_cache *s;
4059
4060 mutex_lock(&slab_mutex);
4061 list_for_each_entry(s, &slab_caches, list)
4062 __kmem_cache_shrink(s, false);
4063 mutex_unlock(&slab_mutex);
4064
4065 return 0;
4066}
4067
4068static void slab_mem_offline_callback(void *arg)
4069{
4070 struct kmem_cache_node *n;
4071 struct kmem_cache *s;
4072 struct memory_notify *marg = arg;
4073 int offline_node;
4074
4075 offline_node = marg->status_change_nid_normal;
4076
4077 /*
4078 * If the node still has available memory. we need kmem_cache_node
4079 * for it yet.
4080 */
4081 if (offline_node < 0)
4082 return;
4083
4084 mutex_lock(&slab_mutex);
4085 list_for_each_entry(s, &slab_caches, list) {
4086 n = get_node(s, offline_node);
4087 if (n) {
4088 /*
4089 * if n->nr_slabs > 0, slabs still exist on the node
4090 * that is going down. We were unable to free them,
4091 * and offline_pages() function shouldn't call this
4092 * callback. So, we must fail.
4093 */
4094 BUG_ON(slabs_node(s, offline_node));
4095
4096 s->node[offline_node] = NULL;
4097 kmem_cache_free(kmem_cache_node, n);
4098 }
4099 }
4100 mutex_unlock(&slab_mutex);
4101}
4102
4103static int slab_mem_going_online_callback(void *arg)
4104{
4105 struct kmem_cache_node *n;
4106 struct kmem_cache *s;
4107 struct memory_notify *marg = arg;
4108 int nid = marg->status_change_nid_normal;
4109 int ret = 0;
4110
4111 /*
4112 * If the node's memory is already available, then kmem_cache_node is
4113 * already created. Nothing to do.
4114 */
4115 if (nid < 0)
4116 return 0;
4117
4118 /*
4119 * We are bringing a node online. No memory is available yet. We must
4120 * allocate a kmem_cache_node structure in order to bring the node
4121 * online.
4122 */
4123 mutex_lock(&slab_mutex);
4124 list_for_each_entry(s, &slab_caches, list) {
4125 /*
4126 * XXX: kmem_cache_alloc_node will fallback to other nodes
4127 * since memory is not yet available from the node that
4128 * is brought up.
4129 */
4130 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4131 if (!n) {
4132 ret = -ENOMEM;
4133 goto out;
4134 }
4135 init_kmem_cache_node(n);
4136 s->node[nid] = n;
4137 }
4138out:
4139 mutex_unlock(&slab_mutex);
4140 return ret;
4141}
4142
4143static int slab_memory_callback(struct notifier_block *self,
4144 unsigned long action, void *arg)
4145{
4146 int ret = 0;
4147
4148 switch (action) {
4149 case MEM_GOING_ONLINE:
4150 ret = slab_mem_going_online_callback(arg);
4151 break;
4152 case MEM_GOING_OFFLINE:
4153 ret = slab_mem_going_offline_callback(arg);
4154 break;
4155 case MEM_OFFLINE:
4156 case MEM_CANCEL_ONLINE:
4157 slab_mem_offline_callback(arg);
4158 break;
4159 case MEM_ONLINE:
4160 case MEM_CANCEL_OFFLINE:
4161 break;
4162 }
4163 if (ret)
4164 ret = notifier_from_errno(ret);
4165 else
4166 ret = NOTIFY_OK;
4167 return ret;
4168}
4169
4170static struct notifier_block slab_memory_callback_nb = {
4171 .notifier_call = slab_memory_callback,
4172 .priority = SLAB_CALLBACK_PRI,
4173};
4174
4175/********************************************************************
4176 * Basic setup of slabs
4177 *******************************************************************/
4178
4179/*
4180 * Used for early kmem_cache structures that were allocated using
4181 * the page allocator. Allocate them properly then fix up the pointers
4182 * that may be pointing to the wrong kmem_cache structure.
4183 */
4184
4185static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4186{
4187 int node;
4188 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4189 struct kmem_cache_node *n;
4190
4191 memcpy(s, static_cache, kmem_cache->object_size);
4192
4193 /*
4194 * This runs very early, and only the boot processor is supposed to be
4195 * up. Even if it weren't true, IRQs are not up so we couldn't fire
4196 * IPIs around.
4197 */
4198 __flush_cpu_slab(s, smp_processor_id());
4199 for_each_kmem_cache_node(s, node, n) {
4200 struct page *p;
4201
4202 list_for_each_entry(p, &n->partial, lru)
4203 p->slab_cache = s;
4204
4205#ifdef CONFIG_SLUB_DEBUG
4206#ifndef CONFIG_RKP_KDP
4207 list_for_each_entry(p, &n->full, lru)
4208 p->slab_cache = s;
4209#endif /*CONFIG_RKP_KDP*/
4210#endif
4211 }
4212 slab_init_memcg_params(s);
4213 list_add(&s->list, &slab_caches);
4214 return s;
4215}
4216
4217void __init kmem_cache_init(void)
4218{
4219 static __initdata struct kmem_cache boot_kmem_cache,
4220 boot_kmem_cache_node;
4221
4222 if (debug_guardpage_minorder())
4223 slub_max_order = 0;
4224
4225 kmem_cache_node = &boot_kmem_cache_node;
4226 kmem_cache = &boot_kmem_cache;
4227
4228 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4229 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
4230
4231 register_hotmemory_notifier(&slab_memory_callback_nb);
4232
4233 /* Able to allocate the per node structures */
4234 slab_state = PARTIAL;
4235
4236 create_boot_cache(kmem_cache, "kmem_cache",
4237 offsetof(struct kmem_cache, node) +
4238 nr_node_ids * sizeof(struct kmem_cache_node *),
4239 SLAB_HWCACHE_ALIGN);
4240
4241 kmem_cache = bootstrap(&boot_kmem_cache);
4242
4243 /*
4244 * Allocate kmem_cache_node properly from the kmem_cache slab.
4245 * kmem_cache_node is separately allocated so no need to
4246 * update any list pointers.
4247 */
4248 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4249
4250 /* Now we can use the kmem_cache to allocate kmalloc slabs */
4251 setup_kmalloc_cache_index_table();
4252 create_kmalloc_caches(0);
4253
4254#ifdef CONFIG_SMP
4255 register_cpu_notifier(&slab_notifier);
4256#endif
4257
4258 pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
4259 cache_line_size(),
4260 slub_min_order, slub_max_order, slub_min_objects,
4261 nr_cpu_ids, nr_node_ids);
4262}
4263
4264void __init kmem_cache_init_late(void)
4265{
4266}
4267
4268struct kmem_cache *
4269__kmem_cache_alias(const char *name, size_t size, size_t align,
4270 unsigned long flags, void (*ctor)(void *))
4271{
4272 struct kmem_cache *s, *c;
4273
4274 s = find_mergeable(size, align, flags, name, ctor);
4275 if (s) {
4276 s->refcount++;
4277
4278 /*
4279 * Adjust the object sizes so that we clear
4280 * the complete object on kzalloc.
4281 */
4282 s->object_size = max(s->object_size, (int)size);
4283 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
4284
4285 for_each_memcg_cache(c, s) {
4286 c->object_size = s->object_size;
4287 c->inuse = max_t(int, c->inuse,
4288 ALIGN(size, sizeof(void *)));
4289 }
4290
4291 if (sysfs_slab_alias(s, name)) {
4292 s->refcount--;
4293 s = NULL;
4294 }
4295 }
4296
4297 return s;
4298}
4299
4300int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
4301{
4302 int err;
4303
4304 err = kmem_cache_open(s, flags);
4305 if (err)
4306 return err;
4307
4308 /* Mutex is not taken during early boot */
4309 if (slab_state <= UP)
4310 return 0;
4311
4312 memcg_propagate_slab_attrs(s);
4313 err = sysfs_slab_add(s);
4314 if (err)
4315 kmem_cache_close(s);
4316
4317 return err;
4318}
4319
4320#ifdef CONFIG_SMP
4321/*
4322 * Use the cpu notifier to insure that the cpu slabs are flushed when
4323 * necessary.
4324 */
4325static int slab_cpuup_callback(struct notifier_block *nfb,
4326 unsigned long action, void *hcpu)
4327{
4328 long cpu = (long)hcpu;
4329 struct kmem_cache *s;
4330 unsigned long flags;
4331
4332 switch (action) {
4333 case CPU_UP_CANCELED:
4334 case CPU_UP_CANCELED_FROZEN:
4335 case CPU_DEAD:
4336 case CPU_DEAD_FROZEN:
4337 mutex_lock(&slab_mutex);
4338 list_for_each_entry(s, &slab_caches, list) {
4339 local_irq_save(flags);
4340 __flush_cpu_slab(s, cpu);
4341 local_irq_restore(flags);
4342 }
4343 mutex_unlock(&slab_mutex);
4344 break;
4345 default:
4346 break;
4347 }
4348 return NOTIFY_OK;
4349}
4350
4351static struct notifier_block slab_notifier = {
4352 .notifier_call = slab_cpuup_callback
4353};
4354
4355#endif
4356
4357void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4358{
4359 struct kmem_cache *s;
4360 void *ret;
4361
4362 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4363 return kmalloc_large(size, gfpflags);
4364
4365 s = kmalloc_slab(size, gfpflags);
4366
4367 if (unlikely(ZERO_OR_NULL_PTR(s)))
4368 return s;
4369
4370 ret = slab_alloc(s, gfpflags, caller);
4371
4372 /* Honor the call site pointer we received. */
4373 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4374
4375 return ret;
4376}
4377
4378#ifdef CONFIG_NUMA
4379void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4380 int node, unsigned long caller)
4381{
4382 struct kmem_cache *s;
4383 void *ret;
4384
4385 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4386 ret = kmalloc_large_node(size, gfpflags, node);
4387
4388 trace_kmalloc_node(caller, ret,
4389 size, PAGE_SIZE << get_order(size),
4390 gfpflags, node);
4391
4392 return ret;
4393 }
4394
4395 s = kmalloc_slab(size, gfpflags);
4396
4397 if (unlikely(ZERO_OR_NULL_PTR(s)))
4398 return s;
4399
4400 ret = slab_alloc_node(s, gfpflags, node, caller);
4401
4402 /* Honor the call site pointer we received. */
4403 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4404
4405 return ret;
4406}
4407#endif
4408
4409#ifdef CONFIG_SYSFS
4410static int count_inuse(struct page *page)
4411{
4412 return page->inuse;
4413}
4414
4415static int count_total(struct page *page)
4416{
4417 return page->objects;
4418}
4419#endif
4420
4421#ifdef CONFIG_SLUB_DEBUG
4422static int validate_slab(struct kmem_cache *s, struct page *page,
4423 unsigned long *map)
4424{
4425 void *p;
4426 void *addr = page_address(page);
4427
4428 if (!check_slab(s, page) ||
4429 !on_freelist(s, page, NULL))
4430 return 0;
4431
4432 /* Now we know that a valid freelist exists */
4433 bitmap_zero(map, page->objects);
4434
4435 get_map(s, page, map);
4436 for_each_object(p, s, addr, page->objects) {
4437 if (test_bit(slab_index(p, s, addr), map))
4438 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4439 return 0;
4440 }
4441
4442 for_each_object(p, s, addr, page->objects)
4443 if (!test_bit(slab_index(p, s, addr), map))
4444 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4445 return 0;
4446 return 1;
4447}
4448
4449static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4450 unsigned long *map)
4451{
4452 slab_lock(page);
4453 validate_slab(s, page, map);
4454 slab_unlock(page);
4455}
4456
4457static int validate_slab_node(struct kmem_cache *s,
4458 struct kmem_cache_node *n, unsigned long *map)
4459{
4460 unsigned long count = 0;
4461 struct page *page;
4462 unsigned long flags;
4463
4464 spin_lock_irqsave(&n->list_lock, flags);
4465
4466 list_for_each_entry(page, &n->partial, lru) {
4467 validate_slab_slab(s, page, map);
4468 count++;
4469 }
4470 if (count != n->nr_partial)
4471 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4472 s->name, count, n->nr_partial);
4473
4474 if (!(s->flags & SLAB_STORE_USER))
4475 goto out;
4476
4477 list_for_each_entry(page, &n->full, lru) {
4478 validate_slab_slab(s, page, map);
4479 count++;
4480 }
4481 if (count != atomic_long_read(&n->nr_slabs))
4482 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4483 s->name, count, atomic_long_read(&n->nr_slabs));
4484
4485out:
4486 spin_unlock_irqrestore(&n->list_lock, flags);
4487 return count;
4488}
4489
4490static long validate_slab_cache(struct kmem_cache *s)
4491{
4492 int node;
4493 unsigned long count = 0;
4494 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4495 sizeof(unsigned long), GFP_KERNEL);
4496 struct kmem_cache_node *n;
4497
4498 if (!map)
4499 return -ENOMEM;
4500
4501 flush_all(s);
4502 for_each_kmem_cache_node(s, node, n)
4503 count += validate_slab_node(s, n, map);
4504 kfree(map);
4505 return count;
4506}
4507/*
4508 * Generate lists of code addresses where slabcache objects are allocated
4509 * and freed.
4510 */
4511
4512struct location {
4513 unsigned long count;
4514 unsigned long addr;
4515 long long sum_time;
4516 long min_time;
4517 long max_time;
4518 long min_pid;
4519 long max_pid;
4520 DECLARE_BITMAP(cpus, NR_CPUS);
4521 nodemask_t nodes;
4522};
4523
4524struct loc_track {
4525 unsigned long max;
4526 unsigned long count;
4527 struct location *loc;
4528};
4529
4530static void free_loc_track(struct loc_track *t)
4531{
4532 if (t->max)
4533 free_pages((unsigned long)t->loc,
4534 get_order(sizeof(struct location) * t->max));
4535}
4536
4537static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4538{
4539 struct location *l;
4540 int order;
4541
4542 order = get_order(sizeof(struct location) * max);
4543
4544 l = (void *)__get_free_pages(flags, order);
4545 if (!l)
4546 return 0;
4547
4548 if (t->count) {
4549 memcpy(l, t->loc, sizeof(struct location) * t->count);
4550 free_loc_track(t);
4551 }
4552 t->max = max;
4553 t->loc = l;
4554 return 1;
4555}
4556
4557static int add_location(struct loc_track *t, struct kmem_cache *s,
4558 const struct track *track)
4559{
4560 long start, end, pos;
4561 struct location *l;
4562 unsigned long caddr;
4563 unsigned long age = jiffies - track->when;
4564
4565 start = -1;
4566 end = t->count;
4567
4568 for ( ; ; ) {
4569 pos = start + (end - start + 1) / 2;
4570
4571 /*
4572 * There is nothing at "end". If we end up there
4573 * we need to add something to before end.
4574 */
4575 if (pos == end)
4576 break;
4577
4578 caddr = t->loc[pos].addr;
4579 if (track->addr == caddr) {
4580
4581 l = &t->loc[pos];
4582 l->count++;
4583 if (track->when) {
4584 l->sum_time += age;
4585 if (age < l->min_time)
4586 l->min_time = age;
4587 if (age > l->max_time)
4588 l->max_time = age;
4589
4590 if (track->pid < l->min_pid)
4591 l->min_pid = track->pid;
4592 if (track->pid > l->max_pid)
4593 l->max_pid = track->pid;
4594
4595 cpumask_set_cpu(track->cpu,
4596 to_cpumask(l->cpus));
4597 }
4598 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4599 return 1;
4600 }
4601
4602 if (track->addr < caddr)
4603 end = pos;
4604 else
4605 start = pos;
4606 }
4607
4608 /*
4609 * Not found. Insert new tracking element.
4610 */
4611 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4612 return 0;
4613
4614 l = t->loc + pos;
4615 if (pos < t->count)
4616 memmove(l + 1, l,
4617 (t->count - pos) * sizeof(struct location));
4618 t->count++;
4619 l->count = 1;
4620 l->addr = track->addr;
4621 l->sum_time = age;
4622 l->min_time = age;
4623 l->max_time = age;
4624 l->min_pid = track->pid;
4625 l->max_pid = track->pid;
4626 cpumask_clear(to_cpumask(l->cpus));
4627 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4628 nodes_clear(l->nodes);
4629 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4630 return 1;
4631}
4632
4633static void process_slab(struct loc_track *t, struct kmem_cache *s,
4634 struct page *page, enum track_item alloc,
4635 unsigned long *map)
4636{
4637 void *addr = page_address(page);
4638 void *p;
4639
4640 bitmap_zero(map, page->objects);
4641 get_map(s, page, map);
4642
4643 for_each_object(p, s, addr, page->objects)
4644 if (!test_bit(slab_index(p, s, addr), map))
4645 add_location(t, s, get_track(s, p, alloc));
4646}
4647
4648static int list_locations(struct kmem_cache *s, char *buf,
4649 enum track_item alloc)
4650{
4651 int len = 0;
4652 unsigned long i;
4653 struct loc_track t = { 0, 0, NULL };
4654 int node;
4655 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4656 sizeof(unsigned long), GFP_KERNEL);
4657 struct kmem_cache_node *n;
4658
4659 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4660 GFP_TEMPORARY)) {
4661 kfree(map);
4662 return sprintf(buf, "Out of memory\n");
4663 }
4664 /* Push back cpu slabs */
4665 flush_all(s);
4666
4667 for_each_kmem_cache_node(s, node, n) {
4668 unsigned long flags;
4669 struct page *page;
4670
4671 if (!atomic_long_read(&n->nr_slabs))
4672 continue;
4673
4674 spin_lock_irqsave(&n->list_lock, flags);
4675 list_for_each_entry(page, &n->partial, lru)
4676 process_slab(&t, s, page, alloc, map);
4677 list_for_each_entry(page, &n->full, lru)
4678 process_slab(&t, s, page, alloc, map);
4679 spin_unlock_irqrestore(&n->list_lock, flags);
4680 }
4681
4682 for (i = 0; i < t.count; i++) {
4683 struct location *l = &t.loc[i];
4684
4685 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4686 break;
4687 len += sprintf(buf + len, "%7ld ", l->count);
4688
4689 if (l->addr)
4690 len += sprintf(buf + len, "%pS", (void *)l->addr);
4691 else
4692 len += sprintf(buf + len, "<not-available>");
4693
4694 if (l->sum_time != l->min_time) {
4695 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4696 l->min_time,
4697 (long)div_u64(l->sum_time, l->count),
4698 l->max_time);
4699 } else
4700 len += sprintf(buf + len, " age=%ld",
4701 l->min_time);
4702
4703 if (l->min_pid != l->max_pid)
4704 len += sprintf(buf + len, " pid=%ld-%ld",
4705 l->min_pid, l->max_pid);
4706 else
4707 len += sprintf(buf + len, " pid=%ld",
4708 l->min_pid);
4709
4710 if (num_online_cpus() > 1 &&
4711 !cpumask_empty(to_cpumask(l->cpus)) &&
4712 len < PAGE_SIZE - 60)
4713 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4714 " cpus=%*pbl",
4715 cpumask_pr_args(to_cpumask(l->cpus)));
4716
4717 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4718 len < PAGE_SIZE - 60)
4719 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4720 " nodes=%*pbl",
4721 nodemask_pr_args(&l->nodes));
4722
4723 len += sprintf(buf + len, "\n");
4724 }
4725
4726 free_loc_track(&t);
4727 kfree(map);
4728 if (!t.count)
4729 len += sprintf(buf, "No data\n");
4730 return len;
4731}
4732#endif
4733
4734#ifdef SLUB_RESILIENCY_TEST
4735static void __init resiliency_test(void)
4736{
4737 u8 *p;
4738
4739 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4740
4741 pr_err("SLUB resiliency testing\n");
4742 pr_err("-----------------------\n");
4743 pr_err("A. Corruption after allocation\n");
4744
4745 p = kzalloc(16, GFP_KERNEL);
4746 p[16] = 0x12;
4747 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4748 p + 16);
4749
4750 validate_slab_cache(kmalloc_caches[4]);
4751
4752 /* Hmmm... The next two are dangerous */
4753 p = kzalloc(32, GFP_KERNEL);
4754 p[32 + sizeof(void *)] = 0x34;
4755 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4756 p);
4757 pr_err("If allocated object is overwritten then not detectable\n\n");
4758
4759 validate_slab_cache(kmalloc_caches[5]);
4760 p = kzalloc(64, GFP_KERNEL);
4761 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4762 *p = 0x56;
4763 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4764 p);
4765 pr_err("If allocated object is overwritten then not detectable\n\n");
4766 validate_slab_cache(kmalloc_caches[6]);
4767
4768 pr_err("\nB. Corruption after free\n");
4769 p = kzalloc(128, GFP_KERNEL);
4770 kfree(p);
4771 *p = 0x78;
4772 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4773 validate_slab_cache(kmalloc_caches[7]);
4774
4775 p = kzalloc(256, GFP_KERNEL);
4776 kfree(p);
4777 p[50] = 0x9a;
4778 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4779 validate_slab_cache(kmalloc_caches[8]);
4780
4781 p = kzalloc(512, GFP_KERNEL);
4782 kfree(p);
4783 p[512] = 0xab;
4784 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4785 validate_slab_cache(kmalloc_caches[9]);
4786}
4787#else
4788#ifdef CONFIG_SYSFS
4789static void resiliency_test(void) {};
4790#endif
4791#endif
4792
4793#ifdef CONFIG_SYSFS
4794enum slab_stat_type {
4795 SL_ALL, /* All slabs */
4796 SL_PARTIAL, /* Only partially allocated slabs */
4797 SL_CPU, /* Only slabs used for cpu caches */
4798 SL_OBJECTS, /* Determine allocated objects not slabs */
4799 SL_TOTAL /* Determine object capacity not slabs */
4800};
4801
4802#define SO_ALL (1 << SL_ALL)
4803#define SO_PARTIAL (1 << SL_PARTIAL)
4804#define SO_CPU (1 << SL_CPU)
4805#define SO_OBJECTS (1 << SL_OBJECTS)
4806#define SO_TOTAL (1 << SL_TOTAL)
4807
4808static ssize_t show_slab_objects(struct kmem_cache *s,
4809 char *buf, unsigned long flags)
4810{
4811 unsigned long total = 0;
4812 int node;
4813 int x;
4814 unsigned long *nodes;
4815
4816 nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4817 if (!nodes)
4818 return -ENOMEM;
4819
4820 if (flags & SO_CPU) {
4821 int cpu;
4822
4823 for_each_possible_cpu(cpu) {
4824 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4825 cpu);
4826 int node;
4827 struct page *page;
4828
4829 page = READ_ONCE(c->page);
4830 if (!page)
4831 continue;
4832
4833 node = page_to_nid(page);
4834 if (flags & SO_TOTAL)
4835 x = page->objects;
4836 else if (flags & SO_OBJECTS)
4837 x = page->inuse;
4838 else
4839 x = 1;
4840
4841 total += x;
4842 nodes[node] += x;
4843
4844 page = READ_ONCE(c->partial);
4845 if (page) {
4846 node = page_to_nid(page);
4847 if (flags & SO_TOTAL)
4848 WARN_ON_ONCE(1);
4849 else if (flags & SO_OBJECTS)
4850 WARN_ON_ONCE(1);
4851 else
4852 x = page->pages;
4853 total += x;
4854 nodes[node] += x;
4855 }
4856 }
4857 }
4858
4859 get_online_mems();
4860#ifdef CONFIG_SLUB_DEBUG
4861 if (flags & SO_ALL) {
4862 struct kmem_cache_node *n;
4863
4864 for_each_kmem_cache_node(s, node, n) {
4865
4866 if (flags & SO_TOTAL)
4867 x = atomic_long_read(&n->total_objects);
4868 else if (flags & SO_OBJECTS)
4869 x = atomic_long_read(&n->total_objects) -
4870 count_partial(n, count_free);
4871 else
4872 x = atomic_long_read(&n->nr_slabs);
4873 total += x;
4874 nodes[node] += x;
4875 }
4876
4877 } else
4878#endif
4879 if (flags & SO_PARTIAL) {
4880 struct kmem_cache_node *n;
4881
4882 for_each_kmem_cache_node(s, node, n) {
4883 if (flags & SO_TOTAL)
4884 x = count_partial(n, count_total);
4885 else if (flags & SO_OBJECTS)
4886 x = count_partial(n, count_inuse);
4887 else
4888 x = n->nr_partial;
4889 total += x;
4890 nodes[node] += x;
4891 }
4892 }
4893 x = sprintf(buf, "%lu", total);
4894#ifdef CONFIG_NUMA
4895 for (node = 0; node < nr_node_ids; node++)
4896 if (nodes[node])
4897 x += sprintf(buf + x, " N%d=%lu",
4898 node, nodes[node]);
4899#endif
4900 put_online_mems();
4901 kfree(nodes);
4902 return x + sprintf(buf + x, "\n");
4903}
4904
4905#ifdef CONFIG_SLUB_DEBUG
4906static int any_slab_objects(struct kmem_cache *s)
4907{
4908 int node;
4909 struct kmem_cache_node *n;
4910
4911 for_each_kmem_cache_node(s, node, n)
4912 if (atomic_long_read(&n->total_objects))
4913 return 1;
4914
4915 return 0;
4916}
4917#endif
4918
4919#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4920#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4921
4922struct slab_attribute {
4923 struct attribute attr;
4924 ssize_t (*show)(struct kmem_cache *s, char *buf);
4925 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4926};
4927
4928#define SLAB_ATTR_RO(_name) \
4929 static struct slab_attribute _name##_attr = \
4930 __ATTR(_name, 0400, _name##_show, NULL)
4931
4932#define SLAB_ATTR(_name) \
4933 static struct slab_attribute _name##_attr = \
4934 __ATTR(_name, 0600, _name##_show, _name##_store)
4935
4936static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4937{
4938 return sprintf(buf, "%d\n", s->size);
4939}
4940SLAB_ATTR_RO(slab_size);
4941
4942static ssize_t align_show(struct kmem_cache *s, char *buf)
4943{
4944 return sprintf(buf, "%d\n", s->align);
4945}
4946SLAB_ATTR_RO(align);
4947
4948static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4949{
4950 return sprintf(buf, "%d\n", s->object_size);
4951}
4952SLAB_ATTR_RO(object_size);
4953
4954static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4955{
4956 return sprintf(buf, "%d\n", oo_objects(s->oo));
4957}
4958SLAB_ATTR_RO(objs_per_slab);
4959
4960static ssize_t order_store(struct kmem_cache *s,
4961 const char *buf, size_t length)
4962{
4963 unsigned long order;
4964 int err;
4965
4966 err = kstrtoul(buf, 10, &order);
4967 if (err)
4968 return err;
4969
4970 if (order > slub_max_order || order < slub_min_order)
4971 return -EINVAL;
4972
4973 calculate_sizes(s, order);
4974 return length;
4975}
4976
4977static ssize_t order_show(struct kmem_cache *s, char *buf)
4978{
4979 return sprintf(buf, "%d\n", oo_order(s->oo));
4980}
4981SLAB_ATTR(order);
4982
4983static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4984{
4985 return sprintf(buf, "%lu\n", s->min_partial);
4986}
4987
4988static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4989 size_t length)
4990{
4991 unsigned long min;
4992 int err;
4993
4994 err = kstrtoul(buf, 10, &min);
4995 if (err)
4996 return err;
4997
4998 set_min_partial(s, min);
4999 return length;
5000}
5001SLAB_ATTR(min_partial);
5002
5003static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5004{
5005 return sprintf(buf, "%u\n", s->cpu_partial);
5006}
5007
5008static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5009 size_t length)
5010{
5011 unsigned long objects;
5012 int err;
5013
5014 err = kstrtoul(buf, 10, &objects);
5015 if (err)
5016 return err;
5017 if (objects && !kmem_cache_has_cpu_partial(s))
5018 return -EINVAL;
5019
5020 s->cpu_partial = objects;
5021 flush_all(s);
5022 return length;
5023}
5024SLAB_ATTR(cpu_partial);
5025
5026static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5027{
5028 if (!s->ctor)
5029 return 0;
5030 return sprintf(buf, "%pS\n", s->ctor);
5031}
5032SLAB_ATTR_RO(ctor);
5033
5034static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5035{
5036 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5037}
5038SLAB_ATTR_RO(aliases);
5039
5040static ssize_t partial_show(struct kmem_cache *s, char *buf)
5041{
5042 return show_slab_objects(s, buf, SO_PARTIAL);
5043}
5044SLAB_ATTR_RO(partial);
5045
5046static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5047{
5048 return show_slab_objects(s, buf, SO_CPU);
5049}
5050SLAB_ATTR_RO(cpu_slabs);
5051
5052static ssize_t objects_show(struct kmem_cache *s, char *buf)
5053{
5054 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5055}
5056SLAB_ATTR_RO(objects);
5057
5058static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5059{
5060 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5061}
5062SLAB_ATTR_RO(objects_partial);
5063
5064static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5065{
5066 int objects = 0;
5067 int pages = 0;
5068 int cpu;
5069 int len;
5070
5071 for_each_online_cpu(cpu) {
5072 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
5073
5074 if (page) {
5075 pages += page->pages;
5076 objects += page->pobjects;
5077 }
5078 }
5079
5080 len = sprintf(buf, "%d(%d)", objects, pages);
5081
5082#ifdef CONFIG_SMP
5083 for_each_online_cpu(cpu) {
5084 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
5085
5086 if (page && len < PAGE_SIZE - 20)
5087 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5088 page->pobjects, page->pages);
5089 }
5090#endif
5091 return len + sprintf(buf + len, "\n");
5092}
5093SLAB_ATTR_RO(slabs_cpu_partial);
5094
5095static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5096{
5097 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5098}
5099
5100static ssize_t reclaim_account_store(struct kmem_cache *s,
5101 const char *buf, size_t length)
5102{
5103 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5104 if (buf[0] == '1')
5105 s->flags |= SLAB_RECLAIM_ACCOUNT;
5106 return length;
5107}
5108SLAB_ATTR(reclaim_account);
5109
5110static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5111{
5112 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5113}
5114SLAB_ATTR_RO(hwcache_align);
5115
5116#ifdef CONFIG_ZONE_DMA
5117static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5118{
5119 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5120}
5121SLAB_ATTR_RO(cache_dma);
5122#endif
5123
5124static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5125{
5126 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
5127}
5128SLAB_ATTR_RO(destroy_by_rcu);
5129
5130static ssize_t reserved_show(struct kmem_cache *s, char *buf)
5131{
5132 return sprintf(buf, "%d\n", s->reserved);
5133}
5134SLAB_ATTR_RO(reserved);
5135
5136#ifdef CONFIG_SLUB_DEBUG
5137static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5138{
5139 return show_slab_objects(s, buf, SO_ALL);
5140}
5141SLAB_ATTR_RO(slabs);
5142
5143static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5144{
5145 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5146}
5147SLAB_ATTR_RO(total_objects);
5148
5149static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5150{
5151 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
5152}
5153
5154static ssize_t sanity_checks_store(struct kmem_cache *s,
5155 const char *buf, size_t length)
5156{
5157 s->flags &= ~SLAB_DEBUG_FREE;
5158 if (buf[0] == '1') {
5159 s->flags &= ~__CMPXCHG_DOUBLE;
5160 s->flags |= SLAB_DEBUG_FREE;
5161 }
5162 return length;
5163}
5164SLAB_ATTR(sanity_checks);
5165
5166static ssize_t trace_show(struct kmem_cache *s, char *buf)
5167{
5168 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5169}
5170
5171static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5172 size_t length)
5173{
5174 /*
5175 * Tracing a merged cache is going to give confusing results
5176 * as well as cause other issues like converting a mergeable
5177 * cache into an umergeable one.
5178 */
5179 if (s->refcount > 1)
5180 return -EINVAL;
5181
5182 s->flags &= ~SLAB_TRACE;
5183 if (buf[0] == '1') {
5184 s->flags &= ~__CMPXCHG_DOUBLE;
5185 s->flags |= SLAB_TRACE;
5186 }
5187 return length;
5188}
5189SLAB_ATTR(trace);
5190
5191static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5192{
5193 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5194}
5195
5196static ssize_t red_zone_store(struct kmem_cache *s,
5197 const char *buf, size_t length)
5198{
5199 if (any_slab_objects(s))
5200 return -EBUSY;
5201
5202 s->flags &= ~SLAB_RED_ZONE;
5203 if (buf[0] == '1') {
5204 s->flags &= ~__CMPXCHG_DOUBLE;
5205 s->flags |= SLAB_RED_ZONE;
5206 }
5207 calculate_sizes(s, -1);
5208 return length;
5209}
5210SLAB_ATTR(red_zone);
5211
5212static ssize_t poison_show(struct kmem_cache *s, char *buf)
5213{
5214 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5215}
5216
5217static ssize_t poison_store(struct kmem_cache *s,
5218 const char *buf, size_t length)
5219{
5220 if (any_slab_objects(s))
5221 return -EBUSY;
5222
5223 s->flags &= ~SLAB_POISON;
5224 if (buf[0] == '1') {
5225 s->flags &= ~__CMPXCHG_DOUBLE;
5226 s->flags |= SLAB_POISON;
5227 }
5228 calculate_sizes(s, -1);
5229 return length;
5230}
5231SLAB_ATTR(poison);
5232
5233static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5234{
5235 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5236}
5237
5238static ssize_t store_user_store(struct kmem_cache *s,
5239 const char *buf, size_t length)
5240{
5241 if (any_slab_objects(s))
5242 return -EBUSY;
5243
5244 s->flags &= ~SLAB_STORE_USER;
5245 if (buf[0] == '1') {
5246 s->flags &= ~__CMPXCHG_DOUBLE;
5247 s->flags |= SLAB_STORE_USER;
5248 }
5249 calculate_sizes(s, -1);
5250 return length;
5251}
5252SLAB_ATTR(store_user);
5253
5254static ssize_t validate_show(struct kmem_cache *s, char *buf)
5255{
5256 return 0;
5257}
5258
5259static ssize_t validate_store(struct kmem_cache *s,
5260 const char *buf, size_t length)
5261{
5262 int ret = -EINVAL;
5263
5264 if (buf[0] == '1') {
5265 ret = validate_slab_cache(s);
5266 if (ret >= 0)
5267 ret = length;
5268 }
5269 return ret;
5270}
5271SLAB_ATTR(validate);
5272
5273static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5274{
5275 if (!(s->flags & SLAB_STORE_USER))
5276 return -ENOSYS;
5277 return list_locations(s, buf, TRACK_ALLOC);
5278}
5279SLAB_ATTR_RO(alloc_calls);
5280
5281static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5282{
5283 if (!(s->flags & SLAB_STORE_USER))
5284 return -ENOSYS;
5285 return list_locations(s, buf, TRACK_FREE);
5286}
5287SLAB_ATTR_RO(free_calls);
5288#endif /* CONFIG_SLUB_DEBUG */
5289
5290#ifdef CONFIG_FAILSLAB
5291static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5292{
5293 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5294}
5295
5296static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5297 size_t length)
5298{
5299 if (s->refcount > 1)
5300 return -EINVAL;
5301
5302 s->flags &= ~SLAB_FAILSLAB;
5303 if (buf[0] == '1')
5304 s->flags |= SLAB_FAILSLAB;
5305 return length;
5306}
5307SLAB_ATTR(failslab);
5308#endif
5309
5310static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5311{
5312 return 0;
5313}
5314
5315static ssize_t shrink_store(struct kmem_cache *s,
5316 const char *buf, size_t length)
5317{
5318 if (buf[0] == '1')
5319 kmem_cache_shrink(s);
5320 else
5321 return -EINVAL;
5322 return length;
5323}
5324SLAB_ATTR(shrink);
5325
5326#ifdef CONFIG_NUMA
5327static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5328{
5329 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
5330}
5331
5332static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5333 const char *buf, size_t length)
5334{
5335 unsigned long ratio;
5336 int err;
5337
5338 err = kstrtoul(buf, 10, &ratio);
5339 if (err)
5340 return err;
5341
5342 if (ratio <= 100)
5343 s->remote_node_defrag_ratio = ratio * 10;
5344
5345 return length;
5346}
5347SLAB_ATTR(remote_node_defrag_ratio);
5348#endif
5349
5350#ifdef CONFIG_SLUB_STATS
5351static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5352{
5353 unsigned long sum = 0;
5354 int cpu;
5355 int len;
5356 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5357
5358 if (!data)
5359 return -ENOMEM;
5360
5361 for_each_online_cpu(cpu) {
5362 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5363
5364 data[cpu] = x;
5365 sum += x;
5366 }
5367
5368 len = sprintf(buf, "%lu", sum);
5369
5370#ifdef CONFIG_SMP
5371 for_each_online_cpu(cpu) {
5372 if (data[cpu] && len < PAGE_SIZE - 20)
5373 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5374 }
5375#endif
5376 kfree(data);
5377 return len + sprintf(buf + len, "\n");
5378}
5379
5380static void clear_stat(struct kmem_cache *s, enum stat_item si)
5381{
5382 int cpu;
5383
5384 for_each_online_cpu(cpu)
5385 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5386}
5387
5388#define STAT_ATTR(si, text) \
5389static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5390{ \
5391 return show_stat(s, buf, si); \
5392} \
5393static ssize_t text##_store(struct kmem_cache *s, \
5394 const char *buf, size_t length) \
5395{ \
5396 if (buf[0] != '0') \
5397 return -EINVAL; \
5398 clear_stat(s, si); \
5399 return length; \
5400} \
5401SLAB_ATTR(text); \
5402
5403STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5404STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5405STAT_ATTR(FREE_FASTPATH, free_fastpath);
5406STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5407STAT_ATTR(FREE_FROZEN, free_frozen);
5408STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5409STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5410STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5411STAT_ATTR(ALLOC_SLAB, alloc_slab);
5412STAT_ATTR(ALLOC_REFILL, alloc_refill);
5413STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5414STAT_ATTR(FREE_SLAB, free_slab);
5415STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5416STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5417STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5418STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5419STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5420STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5421STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5422STAT_ATTR(ORDER_FALLBACK, order_fallback);
5423STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5424STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5425STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5426STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5427STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5428STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5429#endif
5430
5431static struct attribute *slab_attrs[] = {
5432 &slab_size_attr.attr,
5433 &object_size_attr.attr,
5434 &objs_per_slab_attr.attr,
5435 &order_attr.attr,
5436 &min_partial_attr.attr,
5437 &cpu_partial_attr.attr,
5438 &objects_attr.attr,
5439 &objects_partial_attr.attr,
5440 &partial_attr.attr,
5441 &cpu_slabs_attr.attr,
5442 &ctor_attr.attr,
5443 &aliases_attr.attr,
5444 &align_attr.attr,
5445 &hwcache_align_attr.attr,
5446 &reclaim_account_attr.attr,
5447 &destroy_by_rcu_attr.attr,
5448 &shrink_attr.attr,
5449 &reserved_attr.attr,
5450 &slabs_cpu_partial_attr.attr,
5451#ifdef CONFIG_SLUB_DEBUG
5452 &total_objects_attr.attr,
5453 &slabs_attr.attr,
5454 &sanity_checks_attr.attr,
5455 &trace_attr.attr,
5456 &red_zone_attr.attr,
5457 &poison_attr.attr,
5458 &store_user_attr.attr,
5459 &validate_attr.attr,
5460 &alloc_calls_attr.attr,
5461 &free_calls_attr.attr,
5462#endif
5463#ifdef CONFIG_ZONE_DMA
5464 &cache_dma_attr.attr,
5465#endif
5466#ifdef CONFIG_NUMA
5467 &remote_node_defrag_ratio_attr.attr,
5468#endif
5469#ifdef CONFIG_SLUB_STATS
5470 &alloc_fastpath_attr.attr,
5471 &alloc_slowpath_attr.attr,
5472 &free_fastpath_attr.attr,
5473 &free_slowpath_attr.attr,
5474 &free_frozen_attr.attr,
5475 &free_add_partial_attr.attr,
5476 &free_remove_partial_attr.attr,
5477 &alloc_from_partial_attr.attr,
5478 &alloc_slab_attr.attr,
5479 &alloc_refill_attr.attr,
5480 &alloc_node_mismatch_attr.attr,
5481 &free_slab_attr.attr,
5482 &cpuslab_flush_attr.attr,
5483 &deactivate_full_attr.attr,
5484 &deactivate_empty_attr.attr,
5485 &deactivate_to_head_attr.attr,
5486 &deactivate_to_tail_attr.attr,
5487 &deactivate_remote_frees_attr.attr,
5488 &deactivate_bypass_attr.attr,
5489 &order_fallback_attr.attr,
5490 &cmpxchg_double_fail_attr.attr,
5491 &cmpxchg_double_cpu_fail_attr.attr,
5492 &cpu_partial_alloc_attr.attr,
5493 &cpu_partial_free_attr.attr,
5494 &cpu_partial_node_attr.attr,
5495 &cpu_partial_drain_attr.attr,
5496#endif
5497#ifdef CONFIG_FAILSLAB
5498 &failslab_attr.attr,
5499#endif
5500
5501 NULL
5502};
5503
5504static struct attribute_group slab_attr_group = {
5505 .attrs = slab_attrs,
5506};
5507
5508static ssize_t slab_attr_show(struct kobject *kobj,
5509 struct attribute *attr,
5510 char *buf)
5511{
5512 struct slab_attribute *attribute;
5513 struct kmem_cache *s;
5514 int err;
5515
5516 attribute = to_slab_attr(attr);
5517 s = to_slab(kobj);
5518
5519 if (!attribute->show)
5520 return -EIO;
5521
5522 err = attribute->show(s, buf);
5523
5524 return err;
5525}
5526
5527static ssize_t slab_attr_store(struct kobject *kobj,
5528 struct attribute *attr,
5529 const char *buf, size_t len)
5530{
5531 struct slab_attribute *attribute;
5532 struct kmem_cache *s;
5533 int err;
5534
5535 attribute = to_slab_attr(attr);
5536 s = to_slab(kobj);
5537
5538 if (!attribute->store)
5539 return -EIO;
5540
5541 err = attribute->store(s, buf, len);
5542#ifdef CONFIG_MEMCG_KMEM
5543 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5544 struct kmem_cache *c;
5545
5546 mutex_lock(&slab_mutex);
5547 if (s->max_attr_size < len)
5548 s->max_attr_size = len;
5549
5550 /*
5551 * This is a best effort propagation, so this function's return
5552 * value will be determined by the parent cache only. This is
5553 * basically because not all attributes will have a well
5554 * defined semantics for rollbacks - most of the actions will
5555 * have permanent effects.
5556 *
5557 * Returning the error value of any of the children that fail
5558 * is not 100 % defined, in the sense that users seeing the
5559 * error code won't be able to know anything about the state of
5560 * the cache.
5561 *
5562 * Only returning the error code for the parent cache at least
5563 * has well defined semantics. The cache being written to
5564 * directly either failed or succeeded, in which case we loop
5565 * through the descendants with best-effort propagation.
5566 */
5567 for_each_memcg_cache(c, s)
5568 attribute->store(c, buf, len);
5569 mutex_unlock(&slab_mutex);
5570 }
5571#endif
5572 return err;
5573}
5574
5575static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5576{
5577#ifdef CONFIG_MEMCG_KMEM
5578 int i;
5579 char *buffer = NULL;
5580 struct kmem_cache *root_cache;
5581
5582 if (is_root_cache(s))
5583 return;
5584
5585 root_cache = s->memcg_params.root_cache;
5586
5587 /*
5588 * This mean this cache had no attribute written. Therefore, no point
5589 * in copying default values around
5590 */
5591 if (!root_cache->max_attr_size)
5592 return;
5593
5594 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5595 char mbuf[64];
5596 char *buf;
5597 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5598 ssize_t len;
5599
5600 if (!attr || !attr->store || !attr->show)
5601 continue;
5602
5603 /*
5604 * It is really bad that we have to allocate here, so we will
5605 * do it only as a fallback. If we actually allocate, though,
5606 * we can just use the allocated buffer until the end.
5607 *
5608 * Most of the slub attributes will tend to be very small in
5609 * size, but sysfs allows buffers up to a page, so they can
5610 * theoretically happen.
5611 */
5612 if (buffer)
5613 buf = buffer;
5614 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5615 buf = mbuf;
5616 else {
5617 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5618 if (WARN_ON(!buffer))
5619 continue;
5620 buf = buffer;
5621 }
5622
5623 len = attr->show(root_cache, buf);
5624 if (len > 0)
5625 attr->store(s, buf, len);
5626 }
5627
5628 if (buffer)
5629 free_page((unsigned long)buffer);
5630#endif
5631}
5632
5633static void kmem_cache_release(struct kobject *k)
5634{
5635 slab_kmem_cache_release(to_slab(k));
5636}
5637
5638static const struct sysfs_ops slab_sysfs_ops = {
5639 .show = slab_attr_show,
5640 .store = slab_attr_store,
5641};
5642
5643static struct kobj_type slab_ktype = {
5644 .sysfs_ops = &slab_sysfs_ops,
5645 .release = kmem_cache_release,
5646};
5647
5648static int uevent_filter(struct kset *kset, struct kobject *kobj)
5649{
5650 struct kobj_type *ktype = get_ktype(kobj);
5651
5652 if (ktype == &slab_ktype)
5653 return 1;
5654 return 0;
5655}
5656
5657static const struct kset_uevent_ops slab_uevent_ops = {
5658 .filter = uevent_filter,
5659};
5660
5661static struct kset *slab_kset;
5662
5663static inline struct kset *cache_kset(struct kmem_cache *s)
5664{
5665#ifdef CONFIG_MEMCG_KMEM
5666 if (!is_root_cache(s))
5667 return s->memcg_params.root_cache->memcg_kset;
5668#endif
5669 return slab_kset;
5670}
5671
5672#define ID_STR_LENGTH 64
5673
5674/* Create a unique string id for a slab cache:
5675 *
5676 * Format :[flags-]size
5677 */
5678static char *create_unique_id(struct kmem_cache *s)
5679{
5680 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5681 char *p = name;
5682
5683 BUG_ON(!name);
5684
5685 *p++ = ':';
5686 /*
5687 * First flags affecting slabcache operations. We will only
5688 * get here for aliasable slabs so we do not need to support
5689 * too many flags. The flags here must cover all flags that
5690 * are matched during merging to guarantee that the id is
5691 * unique.
5692 */
5693 if (s->flags & SLAB_CACHE_DMA)
5694 *p++ = 'd';
5695 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5696 *p++ = 'a';
5697 if (s->flags & SLAB_DEBUG_FREE)
5698 *p++ = 'F';
5699 if (!(s->flags & SLAB_NOTRACK))
5700 *p++ = 't';
5701 if (p != name + 1)
5702 *p++ = '-';
5703 p += sprintf(p, "%07d", s->size);
5704
5705 BUG_ON(p > name + ID_STR_LENGTH - 1);
5706 return name;
5707}
5708
5709static int sysfs_slab_add(struct kmem_cache *s)
5710{
5711 int err;
5712 const char *name;
5713 int unmergeable = slab_unmergeable(s);
5714
5715 if (unmergeable) {
5716 /*
5717 * Slabcache can never be merged so we can use the name proper.
5718 * This is typically the case for debug situations. In that
5719 * case we can catch duplicate names easily.
5720 */
5721 sysfs_remove_link(&slab_kset->kobj, s->name);
5722 name = s->name;
5723 } else {
5724 /*
5725 * Create a unique name for the slab as a target
5726 * for the symlinks.
5727 */
5728 name = create_unique_id(s);
5729 }
5730
5731 s->kobj.kset = cache_kset(s);
5732 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5733 if (err)
5734 goto out;
5735
5736 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5737 if (err)
5738 goto out_del_kobj;
5739
5740#ifdef CONFIG_MEMCG_KMEM
5741 if (is_root_cache(s)) {
5742 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5743 if (!s->memcg_kset) {
5744 err = -ENOMEM;
5745 goto out_del_kobj;
5746 }
5747 }
5748#endif
5749
5750 kobject_uevent(&s->kobj, KOBJ_ADD);
5751 if (!unmergeable) {
5752 /* Setup first alias */
5753 sysfs_slab_alias(s, s->name);
5754 }
5755out:
5756 if (!unmergeable)
5757 kfree(name);
5758 return err;
5759out_del_kobj:
5760 kobject_del(&s->kobj);
5761 goto out;
5762}
5763
5764void sysfs_slab_remove(struct kmem_cache *s)
5765{
5766 if (slab_state < FULL)
5767 /*
5768 * Sysfs has not been setup yet so no need to remove the
5769 * cache from sysfs.
5770 */
5771 return;
5772
5773#ifdef CONFIG_MEMCG_KMEM
5774 kset_unregister(s->memcg_kset);
5775#endif
5776 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5777 kobject_del(&s->kobj);
5778 kobject_put(&s->kobj);
5779}
5780
5781/*
5782 * Need to buffer aliases during bootup until sysfs becomes
5783 * available lest we lose that information.
5784 */
5785struct saved_alias {
5786 struct kmem_cache *s;
5787 const char *name;
5788 struct saved_alias *next;
5789};
5790
5791static struct saved_alias *alias_list;
5792
5793static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5794{
5795 struct saved_alias *al;
5796
5797 if (slab_state == FULL) {
5798 /*
5799 * If we have a leftover link then remove it.
5800 */
5801 sysfs_remove_link(&slab_kset->kobj, name);
5802 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5803 }
5804
5805 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5806 if (!al)
5807 return -ENOMEM;
5808
5809 al->s = s;
5810 al->name = name;
5811 al->next = alias_list;
5812 alias_list = al;
5813 return 0;
5814}
5815
5816static int __init slab_sysfs_init(void)
5817{
5818 struct kmem_cache *s;
5819 int err;
5820
5821 mutex_lock(&slab_mutex);
5822
5823 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5824 if (!slab_kset) {
5825 mutex_unlock(&slab_mutex);
5826 pr_err("Cannot register slab subsystem.\n");
5827 return -ENOSYS;
5828 }
5829
5830 slab_state = FULL;
5831
5832 list_for_each_entry(s, &slab_caches, list) {
5833 err = sysfs_slab_add(s);
5834 if (err)
5835 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5836 s->name);
5837 }
5838
5839 while (alias_list) {
5840 struct saved_alias *al = alias_list;
5841
5842 alias_list = alias_list->next;
5843 err = sysfs_slab_alias(al->s, al->name);
5844 if (err)
5845 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5846 al->name);
5847 kfree(al);
5848 }
5849
5850 mutex_unlock(&slab_mutex);
5851 resiliency_test();
5852 return 0;
5853}
5854
5855__initcall(slab_sysfs_init);
5856#endif /* CONFIG_SYSFS */
5857
5858/*
5859 * The /proc/slabinfo ABI
5860 */
5861#ifdef CONFIG_SLABINFO
5862void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5863{
5864 unsigned long nr_slabs = 0;
5865 unsigned long nr_objs = 0;
5866 unsigned long nr_free = 0;
5867 int node;
5868 struct kmem_cache_node *n;
5869
5870 for_each_kmem_cache_node(s, node, n) {
5871 nr_slabs += node_nr_slabs(n);
5872 nr_objs += node_nr_objs(n);
5873 nr_free += count_partial(n, count_free);
5874 }
5875
5876 sinfo->active_objs = nr_objs - nr_free;
5877 sinfo->num_objs = nr_objs;
5878 sinfo->active_slabs = nr_slabs;
5879 sinfo->num_slabs = nr_slabs;
5880 sinfo->objects_per_slab = oo_objects(s->oo);
5881 sinfo->cache_order = oo_order(s->oo);
5882}
5883
5884void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5885{
5886}
5887
5888ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5889 size_t count, loff_t *ppos)
5890{
5891 return -EIO;
5892}
5893#endif /* CONFIG_SLABINFO */