bpf: reduce verifier memory consumption
authorAlexei Starovoitov <ast@fb.com>
Wed, 3 Apr 2019 18:39:01 +0000 (18:39 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 20 Apr 2019 07:15:08 +0000 (09:15 +0200)
commit 638f5b90d46016372a8e3e0a434f199cc5e12b8c upstream.

the verifier got progressively smarter over time and size of its internal
state grew as well. Time to reduce the memory consumption.

Before:
sizeof(struct bpf_verifier_state) = 6520
After:
sizeof(struct bpf_verifier_state) = 896

It's done by observing that majority of BPF programs use little to
no stack whereas verifier kept all of 512 stack slots ready always.
Instead dynamically reallocate struct verifier state when stack
access is detected.
Runtime difference before vs after is within a noise.
The number of processed instructions stays the same.

Cc: jakub.kicinski@netronome.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Backported to 4.14 by sblbir]
Signed-off-by: Balbir Singh <sblbir@amzn.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/net/ethernet/netronome/nfp/bpf/verifier.c
include/linux/bpf_verifier.h
kernel/bpf/verifier.c

index 5b783a91b115e332dbae8447f32e84c1957b8f7d..8793fa57f8445495c4c022b2342cf7c01aad6eb3 100644 (file)
@@ -76,9 +76,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 
 static int
 nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
-                  const struct bpf_verifier_env *env)
+                  struct bpf_verifier_env *env)
 {
-       const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+       const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0;
        u64 imm;
 
        if (nfp_prog->act == NN_ACT_XDP)
@@ -113,9 +113,10 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 
 static int
 nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
-                     const struct bpf_verifier_env *env, u8 reg)
+                     struct bpf_verifier_env *env, u8 reg_no)
 {
-       if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+       const struct bpf_reg_state *reg = cur_regs(env) + reg_no;
+       if (reg->type != PTR_TO_CTX)
                return -EINVAL;
 
        return 0;
index 8458cc5fbce5728dc2038451d26b37a8edd9d22c..0fc3d9afb8bfa2c4d321d26b213cafa669cbb0b7 100644 (file)
@@ -91,14 +91,19 @@ enum bpf_stack_slot_type {
 
 #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
 
+struct bpf_stack_state {
+       struct bpf_reg_state spilled_ptr;
+       u8 slot_type[BPF_REG_SIZE];
+};
+
 /* state of the program:
  * type of all registers and stack info
  */
 struct bpf_verifier_state {
        struct bpf_reg_state regs[MAX_BPF_REG];
-       u8 stack_slot_type[MAX_BPF_STACK];
-       struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
        struct bpf_verifier_state *parent;
+       int allocated_stack;
+       struct bpf_stack_state *stack;
 };
 
 /* linked list of verifier states used to prune search */
@@ -133,7 +138,7 @@ struct bpf_verifier_env {
        struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
        int stack_size;                 /* number of states to be processed */
        bool strict_alignment;          /* perform strict pointer alignment checks */
-       struct bpf_verifier_state cur_state; /* current verifier state */
+       struct bpf_verifier_state *cur_state; /* current verifier state */
        struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
        const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
        void *analyzer_priv; /* pointer to external analyzer's private data */
@@ -145,6 +150,11 @@ struct bpf_verifier_env {
        struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 };
 
+static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+{
+       return env->cur_state->regs;
+}
+
 int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
                 void *priv);
 
index f6755fd5bae2897ff73f97c255fe068e296336e0..805bc218150d95d749cf621d1b557ec72cb784b5 100644 (file)
@@ -265,10 +265,11 @@ static void print_verifier_state(struct bpf_verifier_state *state)
                        verbose(")");
                }
        }
-       for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
-               if (state->stack_slot_type[i] == STACK_SPILL)
-                       verbose(" fp%d=%s", -MAX_BPF_STACK + i,
-                               reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]);
+       for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+               if (state->stack[i].slot_type[0] == STACK_SPILL)
+                       verbose(" fp%d=%s",
+                               -MAX_BPF_STACK + i * BPF_REG_SIZE,
+                               reg_type_str[state->stack[i].spilled_ptr.type]);
        }
        verbose("\n");
 }
@@ -434,35 +435,123 @@ static void print_bpf_insn(const struct bpf_verifier_env *env,
        }
 }
 
-static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
+static int copy_stack_state(struct bpf_verifier_state *dst,
+                           const struct bpf_verifier_state *src)
 {
-       struct bpf_verifier_stack_elem *elem;
-       int insn_idx;
+       if (!src->stack)
+               return 0;
+       if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
+               /* internal bug, make state invalid to reject the program */
+               memset(dst, 0, sizeof(*dst));
+               return -EFAULT;
+       }
+       memcpy(dst->stack, src->stack,
+              sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
+       return 0;
+}
+
+/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
+ * make it consume minimal amount of memory. check_stack_write() access from
+ * the program calls into realloc_verifier_state() to grow the stack size.
+ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
+ * which this function copies over. It points to previous bpf_verifier_state
+ * which is never reallocated
+ */
+static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
+                                 bool copy_old)
+{
+       u32 old_size = state->allocated_stack;
+       struct bpf_stack_state *new_stack;
+       int slot = size / BPF_REG_SIZE;
+
+       if (size <= old_size || !size) {
+               if (copy_old)
+                       return 0;
+               state->allocated_stack = slot * BPF_REG_SIZE;
+               if (!size && old_size) {
+                       kfree(state->stack);
+                       state->stack = NULL;
+               }
+               return 0;
+       }
+       new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
+                                 GFP_KERNEL);
+       if (!new_stack)
+               return -ENOMEM;
+       if (copy_old) {
+               if (state->stack)
+                       memcpy(new_stack, state->stack,
+                              sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
+               memset(new_stack + old_size / BPF_REG_SIZE, 0,
+                      sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
+       }
+       state->allocated_stack = slot * BPF_REG_SIZE;
+       kfree(state->stack);
+       state->stack = new_stack;
+       return 0;
+}
+
+static void free_verifier_state(struct bpf_verifier_state *state)
+{
+       kfree(state->stack);
+       kfree(state);
+}
+
+/* copy verifier state from src to dst growing dst stack space
+ * when necessary to accommodate larger src stack
+ */
+static int copy_verifier_state(struct bpf_verifier_state *dst,
+                              const struct bpf_verifier_state *src)
+{
+       int err;
+
+       err = realloc_verifier_state(dst, src->allocated_stack, false);
+       if (err)
+               return err;
+       memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
+       return copy_stack_state(dst, src);
+}
+
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
+                    int *insn_idx)
+{
+       struct bpf_verifier_state *cur = env->cur_state;
+       struct bpf_verifier_stack_elem *elem, *head = env->head;
+       int err;
 
        if (env->head == NULL)
-               return -1;
+               return -ENOENT;
 
-       memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
-       insn_idx = env->head->insn_idx;
+       if (cur) {
+               err = copy_verifier_state(cur, &head->st);
+               if (err)
+                       return err;
+       }
+       if (insn_idx)
+               *insn_idx = head->insn_idx;
        if (prev_insn_idx)
-               *prev_insn_idx = env->head->prev_insn_idx;
-       elem = env->head->next;
-       kfree(env->head);
+               *prev_insn_idx = head->prev_insn_idx;
+       elem = head->next;
+       kfree(head);
        env->head = elem;
        env->stack_size--;
-       return insn_idx;
+       return 0;
 }
 
 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
                                             int insn_idx, int prev_insn_idx)
 {
        struct bpf_verifier_stack_elem *elem;
+       struct bpf_verifier_state *cur = env->cur_state;
+       int err;
 
-       elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+       elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
        if (!elem)
                goto err;
 
-       memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
+       err = copy_verifier_state(&elem->st, cur);
+       if (err)
+               return NULL;
        elem->insn_idx = insn_idx;
        elem->prev_insn_idx = prev_insn_idx;
        elem->next = env->head;
@@ -475,7 +564,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
        return &elem->st;
 err:
        /* pop all elements and return */
-       while (pop_stack(env, NULL) >= 0);
+       while (!pop_stack(env, NULL, NULL));
        return NULL;
 }
 
@@ -671,7 +760,7 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
                         enum reg_arg_type t)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = env->cur_state->regs;
 
        if (regno >= MAX_BPF_REG) {
                verbose("R%d is invalid\n", regno);
@@ -684,7 +773,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
                        verbose("R%d !read_ok\n", regno);
                        return -EACCES;
                }
-               mark_reg_read(&env->cur_state, regno);
+               mark_reg_read(env->cur_state, regno);
        } else {
                /* check whether register used as dest operand can be written to */
                if (regno == BPF_REG_FP) {
@@ -721,10 +810,21 @@ static int check_stack_write(struct bpf_verifier_env *env,
                             struct bpf_verifier_state *state, int off,
                             int size, int value_regno, int insn_idx)
 {
-       int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
+       int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
+
+       err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
+                                    true);
+       if (err)
+               return err;
        /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
         * so it's aligned access and [off, off + size) are within stack limits
         */
+       if (!env->allow_ptr_leaks &&
+           state->stack[spi].slot_type[0] == STACK_SPILL &&
+           size != BPF_REG_SIZE) {
+               verbose("attempt to corrupt spilled pointer on stack\n");
+               return -EACCES;
+       }
 
        if (value_regno >= 0 &&
            is_spillable_regtype(state->regs[value_regno].type)) {
@@ -736,11 +836,11 @@ static int check_stack_write(struct bpf_verifier_env *env,
                }
 
                /* save register state */
-               state->spilled_regs[spi] = state->regs[value_regno];
-               state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
+               state->stack[spi].spilled_ptr = state->regs[value_regno];
+               state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 
                for (i = 0; i < BPF_REG_SIZE; i++) {
-                       if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
+                       if (state->stack[spi].slot_type[i] == STACK_MISC &&
                            !env->allow_ptr_leaks) {
                                int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
                                int soff = (-spi - 1) * BPF_REG_SIZE;
@@ -763,14 +863,15 @@ static int check_stack_write(struct bpf_verifier_env *env,
                                }
                                *poff = soff;
                        }
-                       state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
+                       state->stack[spi].slot_type[i] = STACK_SPILL;
                }
        } else {
                /* regular write of data into stack */
-               state->spilled_regs[spi] = (struct bpf_reg_state) {};
+               state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
 
                for (i = 0; i < size; i++)
-                       state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
+                       state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
+                               STACK_MISC;
        }
        return 0;
 }
@@ -781,10 +882,10 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo
 
        while (parent) {
                /* if read wasn't screened by an earlier write ... */
-               if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN)
+               if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
                        break;
                /* ... then we depend on parent's value */
-               parent->spilled_regs[slot].live |= REG_LIVE_READ;
+               parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
                state = parent;
                parent = state->parent;
        }
@@ -793,34 +894,37 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo
 static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
                            int value_regno)
 {
-       u8 *slot_type;
-       int i, spi;
+       int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
+       u8 *stype;
 
-       slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
+       if (state->allocated_stack <= slot) {
+               verbose("invalid read from stack off %d+0 size %d\n",
+                       off, size);
+               return -EACCES;
+       }
+       stype = state->stack[spi].slot_type;
 
-       if (slot_type[0] == STACK_SPILL) {
+       if (stype[0] == STACK_SPILL) {
                if (size != BPF_REG_SIZE) {
                        verbose("invalid size of register spill\n");
                        return -EACCES;
                }
                for (i = 1; i < BPF_REG_SIZE; i++) {
-                       if (slot_type[i] != STACK_SPILL) {
+                       if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
                                verbose("corrupted spill memory\n");
                                return -EACCES;
                        }
                }
 
-               spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
-
                if (value_regno >= 0) {
                        /* restore register state from stack */
-                       state->regs[value_regno] = state->spilled_regs[spi];
+                       state->regs[value_regno] = state->stack[spi].spilled_ptr;
                        mark_stack_slot_read(state, spi);
                }
                return 0;
        } else {
                for (i = 0; i < size; i++) {
-                       if (slot_type[i] != STACK_MISC) {
+                       if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
                                verbose("invalid read from stack off %d+%d size %d\n",
                                        off, i, size);
                                return -EACCES;
@@ -837,7 +941,8 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
                            int size)
 {
-       struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
+       struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_map *map = regs[regno].map_ptr;
 
        if (off < 0 || size <= 0 || off + size > map->value_size) {
                verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
@@ -849,9 +954,9 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 
 /* check read/write into a map element with possible variable offset */
 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
-                               int off, int size)
+                           int off, int size)
 {
-       struct bpf_verifier_state *state = &env->cur_state;
+       struct bpf_verifier_state *state = env->cur_state;
        struct bpf_reg_state *reg = &state->regs[regno];
        int err;
 
@@ -924,7 +1029,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
                                 int off, int size)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = cur_regs(env);
        struct bpf_reg_state *reg = &regs[regno];
 
        if (off < 0 || size <= 0 || (u64)off + size > reg->range) {
@@ -938,7 +1043,7 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
                               int size)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = cur_regs(env);
        struct bpf_reg_state *reg = &regs[regno];
        int err;
 
@@ -1008,19 +1113,19 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
 
 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 {
-       return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
+       return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
 }
 
 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 {
-       const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
+       const struct bpf_reg_state *reg = cur_regs(env) + regno;
 
        return reg->type == PTR_TO_CTX;
 }
 
 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
 {
-       const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
+       const struct bpf_reg_state *reg = cur_regs(env) + regno;
 
        return reg->type == PTR_TO_PACKET;
 }
@@ -1145,8 +1250,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                            int off, int bpf_size, enum bpf_access_type t,
                            int value_regno, bool strict_alignment_once)
 {
-       struct bpf_verifier_state *state = &env->cur_state;
-       struct bpf_reg_state *reg = &state->regs[regno];
+       struct bpf_verifier_state *state = env->cur_state;
+       struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_reg_state *reg = regs + regno;
        int size, err = 0;
 
        size = bpf_size_to_bytes(bpf_size);
@@ -1170,7 +1276,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 
                err = check_map_access(env, regno, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
-                       mark_reg_unknown(state->regs, value_regno);
+                       mark_reg_unknown(regs, value_regno);
 
        } else if (reg->type == PTR_TO_CTX) {
                enum bpf_reg_type reg_type = SCALAR_VALUE;
@@ -1203,13 +1309,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                         * the offset is zero.
                         */
                        if (reg_type == SCALAR_VALUE)
-                               mark_reg_unknown(state->regs, value_regno);
+                               mark_reg_unknown(regs, value_regno);
                        else
-                               mark_reg_known_zero(state->regs, value_regno);
-                       state->regs[value_regno].id = 0;
-                       state->regs[value_regno].off = 0;
-                       state->regs[value_regno].range = 0;
-                       state->regs[value_regno].type = reg_type;
+                               mark_reg_known_zero(regs, value_regno);
+                       regs[value_regno].id = 0;
+                       regs[value_regno].off = 0;
+                       regs[value_regno].range = 0;
+                       regs[value_regno].type = reg_type;
                }
 
        } else if (reg->type == PTR_TO_STACK) {
@@ -1234,18 +1340,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                if (env->prog->aux->stack_depth < -off)
                        env->prog->aux->stack_depth = -off;
 
-               if (t == BPF_WRITE) {
-                       if (!env->allow_ptr_leaks &&
-                           state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
-                           size != BPF_REG_SIZE) {
-                               verbose("attempt to corrupt spilled pointer on stack\n");
-                               return -EACCES;
-                       }
+               if (t == BPF_WRITE)
                        err = check_stack_write(env, state, off, size,
                                                value_regno, insn_idx);
-               } else {
+               else
                        err = check_stack_read(state, off, size, value_regno);
-               }
        } else if (reg->type == PTR_TO_PACKET) {
                if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
                        verbose("cannot write into packet\n");
@@ -1258,7 +1357,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
                }
                err = check_packet_access(env, regno, off, size);
                if (!err && t == BPF_READ && value_regno >= 0)
-                       mark_reg_unknown(state->regs, value_regno);
+                       mark_reg_unknown(regs, value_regno);
        } else {
                verbose("R%d invalid mem access '%s'\n",
                        regno, reg_type_str[reg->type]);
@@ -1266,9 +1365,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
        }
 
        if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
-           state->regs[value_regno].type == SCALAR_VALUE) {
+           regs[value_regno].type == SCALAR_VALUE) {
                /* b/h/w load zero-extends, mark upper bits as known 0 */
-               coerce_reg_to_size(&state->regs[value_regno], size);
+               coerce_reg_to_size(&regs[value_regno], size);
        }
        return err;
 }
@@ -1333,9 +1432,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                                int access_size, bool zero_size_allowed,
                                struct bpf_call_arg_meta *meta)
 {
-       struct bpf_verifier_state *state = &env->cur_state;
+       struct bpf_verifier_state *state = env->cur_state;
        struct bpf_reg_state *regs = state->regs;
-       int off, i;
+       int off, i, slot, spi;
 
        if (regs[regno].type != PTR_TO_STACK) {
                /* Allow zero-byte read from NULL, regardless of pointer type */
@@ -1376,7 +1475,11 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
        }
 
        for (i = 0; i < access_size; i++) {
-               if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
+               slot = -(off + i) - 1;
+               spi = slot / BPF_REG_SIZE;
+               if (state->allocated_stack <= slot ||
+                   state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
+                       STACK_MISC) {
                        verbose("invalid indirect read from stack off %d+%d size %d\n",
                                off, i, access_size);
                        return -EACCES;
@@ -1389,7 +1492,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
                                   int access_size, bool zero_size_allowed,
                                   struct bpf_call_arg_meta *meta)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+       struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 
        switch (reg->type) {
        case PTR_TO_PACKET:
@@ -1406,7 +1509,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                          enum bpf_arg_type arg_type,
                          struct bpf_call_arg_meta *meta)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+       struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
        enum bpf_reg_type expected_type, type = reg->type;
        int err = 0;
 
@@ -1678,7 +1781,7 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
  */
 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
-       struct bpf_verifier_state *state = &env->cur_state;
+       struct bpf_verifier_state *state = env->cur_state;
        struct bpf_reg_state *regs = state->regs, *reg;
        int i;
 
@@ -1687,10 +1790,10 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
                    regs[i].type == PTR_TO_PACKET_END)
                        mark_reg_unknown(regs, i);
 
-       for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
-               if (state->stack_slot_type[i] != STACK_SPILL)
+       for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+               if (state->stack[i].slot_type[0] != STACK_SPILL)
                        continue;
-               reg = &state->spilled_regs[i / BPF_REG_SIZE];
+               reg = &state->stack[i].spilled_ptr;
                if (reg->type != PTR_TO_PACKET &&
                    reg->type != PTR_TO_PACKET_END)
                        continue;
@@ -1700,9 +1803,8 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 
 static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 {
-       struct bpf_verifier_state *state = &env->cur_state;
        const struct bpf_func_proto *fn = NULL;
-       struct bpf_reg_state *regs = state->regs;
+       struct bpf_reg_state *regs;
        struct bpf_call_arg_meta meta;
        bool changes_data;
        int i, err;
@@ -1776,6 +1878,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
                        return err;
        }
 
+       regs = cur_regs(env);
        /* reset caller saved regs */
        for (i = 0; i < CALLER_SAVED_REGS; i++) {
                mark_reg_not_init(regs, caller_saved[i]);
@@ -1890,7 +1993,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                                   const struct bpf_reg_state *ptr_reg,
                                   const struct bpf_reg_state *off_reg)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+       struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
        bool known = tnum_is_const(off_reg->var_off);
        s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
            smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
@@ -2097,7 +2200,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                                      struct bpf_reg_state *dst_reg,
                                      struct bpf_reg_state src_reg)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = cur_regs(env);
        u8 opcode = BPF_OP(insn->code);
        bool src_known, dst_known;
        s64 smin_val, smax_val;
@@ -2345,7 +2448,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                                   struct bpf_insn *insn)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
+       struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
        struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
        u8 opcode = BPF_OP(insn->code);
        int rc;
@@ -2419,12 +2522,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 
        /* Got here implies adding two SCALAR_VALUEs */
        if (WARN_ON_ONCE(ptr_reg)) {
-               print_verifier_state(&env->cur_state);
+               print_verifier_state(env->cur_state);
                verbose("verifier internal error: unexpected ptr_reg\n");
                return -EINVAL;
        }
        if (WARN_ON(!src_reg)) {
-               print_verifier_state(&env->cur_state);
+               print_verifier_state(env->cur_state);
                verbose("verifier internal error: no src_reg\n");
                return -EINVAL;
        }
@@ -2434,7 +2537,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 /* check validity of 32-bit and 64-bit arithmetic operations */
 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = cur_regs(env);
        u8 opcode = BPF_OP(insn->code);
        int err;
 
@@ -2661,10 +2764,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
                        /* keep the maximum range already checked */
                        regs[i].range = max(regs[i].range, new_range);
 
-       for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
-               if (state->stack_slot_type[i] != STACK_SPILL)
+       for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+               if (state->stack[i].slot_type[0] != STACK_SPILL)
                        continue;
-               reg = &state->spilled_regs[i / BPF_REG_SIZE];
+               reg = &state->stack[i].spilled_ptr;
                if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
                        reg->range = max(reg->range, new_range);
        }
@@ -2914,17 +3017,17 @@ static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
        for (i = 0; i < MAX_BPF_REG; i++)
                mark_map_reg(regs, i, id, is_null);
 
-       for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
-               if (state->stack_slot_type[i] != STACK_SPILL)
+       for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+               if (state->stack[i].slot_type[0] != STACK_SPILL)
                        continue;
-               mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null);
+               mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
        }
 }
 
 static int check_cond_jmp_op(struct bpf_verifier_env *env,
                             struct bpf_insn *insn, int *insn_idx)
 {
-       struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+       struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
        struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
        u8 opcode = BPF_OP(insn->code);
        int err;
@@ -3087,7 +3190,7 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 /* verify BPF_LD_IMM64 instruction */
 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = cur_regs(env);
        int err;
 
        if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -3148,7 +3251,7 @@ static bool may_access_skb(enum bpf_prog_type type)
  */
 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *regs = cur_regs(env);
        u8 mode = BPF_MODE(insn->code);
        int i, err;
 
@@ -3534,6 +3637,57 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
        return false;
 }
 
+static bool stacksafe(struct bpf_verifier_state *old,
+                     struct bpf_verifier_state *cur,
+                     struct idpair *idmap)
+{
+       int i, spi;
+
+       /* if explored stack has more populated slots than current stack
+        * such stacks are not equivalent
+        */
+       if (old->allocated_stack > cur->allocated_stack)
+               return false;
+
+       /* walk slots of the explored stack and ignore any additional
+        * slots in the current stack, since explored(safe) state
+        * didn't use them
+        */
+       for (i = 0; i < old->allocated_stack; i++) {
+               spi = i / BPF_REG_SIZE;
+
+               if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
+                       continue;
+               if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+                   cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+                       /* Ex: old explored (safe) state has STACK_SPILL in
+                        * this stack slot, but current has has STACK_MISC ->
+                        * this verifier states are not equivalent,
+                        * return false to continue verification of this path
+                        */
+                       return false;
+               if (i % BPF_REG_SIZE)
+                       continue;
+               if (old->stack[spi].slot_type[0] != STACK_SPILL)
+                       continue;
+               if (!regsafe(&old->stack[spi].spilled_ptr,
+                            &cur->stack[spi].spilled_ptr,
+                            idmap))
+                       /* when explored and current stack slot are both storing
+                        * spilled registers, check that stored pointers types
+                        * are the same as well.
+                        * Ex: explored safe path could have stored
+                        * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
+                        * but current path has stored:
+                        * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
+                        * such verifier states are not equivalent.
+                        * return false to continue verification of this path
+                        */
+                       return false;
+       }
+       return true;
+}
+
 /* compare two verifier states
  *
  * all states stored in state_list are known to be valid, since
@@ -3578,37 +3732,8 @@ static bool states_equal(struct bpf_verifier_env *env,
                        goto out_free;
        }
 
-       for (i = 0; i < MAX_BPF_STACK; i++) {
-               if (old->stack_slot_type[i] == STACK_INVALID)
-                       continue;
-               if (old->stack_slot_type[i] != cur->stack_slot_type[i])
-                       /* Ex: old explored (safe) state has STACK_SPILL in
-                        * this stack slot, but current has has STACK_MISC ->
-                        * this verifier states are not equivalent,
-                        * return false to continue verification of this path
-                        */
-                       goto out_free;
-               if (i % BPF_REG_SIZE)
-                       continue;
-               if (old->stack_slot_type[i] != STACK_SPILL)
-                       continue;
-               if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
-                            &cur->spilled_regs[i / BPF_REG_SIZE],
-                            idmap))
-                       /* when explored and current stack slot are both storing
-                        * spilled registers, check that stored pointers types
-                        * are the same as well.
-                        * Ex: explored safe path could have stored
-                        * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
-                        * but current path has stored:
-                        * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
-                        * such verifier states are not equivalent.
-                        * return false to continue verification of this path
-                        */
-                       goto out_free;
-               else
-                       continue;
-       }
+       if (!stacksafe(old, cur, idmap))
+               goto out_free;
        ret = true;
 out_free:
        kfree(idmap);
@@ -3644,17 +3769,19 @@ static bool do_propagate_liveness(const struct bpf_verifier_state *state,
                }
        }
        /* ... and stack slots */
-       for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) {
-               if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
+       for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
+                   i < parent->allocated_stack / BPF_REG_SIZE; i++) {
+               if (parent->stack[i].slot_type[0] != STACK_SPILL)
                        continue;
-               if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
+               if (state->stack[i].slot_type[0] != STACK_SPILL)
                        continue;
-               if (parent->spilled_regs[i].live & REG_LIVE_READ)
+               if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
                        continue;
-               if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN))
+               if (writes &&
+                   (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
                        continue;
-               if (state->spilled_regs[i].live & REG_LIVE_READ) {
-                       parent->spilled_regs[i].live |= REG_LIVE_READ;
+               if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
+                       parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
                        touched = true;
                }
        }
@@ -3684,6 +3811,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
        struct bpf_verifier_state_list *new_sl;
        struct bpf_verifier_state_list *sl;
+       struct bpf_verifier_state *cur = env->cur_state;
        int i;
 
        sl = env->explored_states[insn_idx];
@@ -3694,7 +3822,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
                return 0;
 
        while (sl != STATE_LIST_MARK) {
-               if (states_equal(env, &sl->state, &env->cur_state)) {
+               if (states_equal(env, &sl->state, cur)) {
                        /* reached equivalent register/stack state,
                         * prune the search.
                         * Registers read by the continuation are read by us.
@@ -3705,7 +3833,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
                         * they'll be immediately forgotten as we're pruning
                         * this state and will pop a new one.
                         */
-                       propagate_liveness(&sl->state, &env->cur_state);
+                       propagate_liveness(&sl->state, cur);
                        return 1;
                }
                sl = sl->next;
@@ -3717,16 +3845,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
         * it will be rejected. Since there are no loops, we won't be
         * seeing this 'insn_idx' instruction again on the way to bpf_exit
         */
-       new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
+       new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
        if (!new_sl)
                return -ENOMEM;
 
        /* add new state to the head of linked list */
-       memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
+       copy_verifier_state(&new_sl->state, cur);
        new_sl->next = env->explored_states[insn_idx];
        env->explored_states[insn_idx] = new_sl;
        /* connect new state to parentage chain */
-       env->cur_state.parent = &new_sl->state;
+       cur->parent = &new_sl->state;
        /* clear write marks in current state: the writes we did are not writes
         * our child did, so they don't screen off its reads from us.
         * (There are no read marks in current state, because reads always mark
@@ -3734,10 +3862,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
         * explored_states can get read marks.)
         */
        for (i = 0; i < BPF_REG_FP; i++)
-               env->cur_state.regs[i].live = REG_LIVE_NONE;
-       for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++)
-               if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL)
-                       env->cur_state.spilled_regs[i].live = REG_LIVE_NONE;
+               cur->regs[i].live = REG_LIVE_NONE;
+       for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
+               if (cur->stack[i].slot_type[0] == STACK_SPILL)
+                       cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
        return 0;
 }
 
@@ -3752,15 +3880,19 @@ static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
 
 static int do_check(struct bpf_verifier_env *env)
 {
-       struct bpf_verifier_state *state = &env->cur_state;
+       struct bpf_verifier_state *state;
        struct bpf_insn *insns = env->prog->insnsi;
-       struct bpf_reg_state *regs = state->regs;
+       struct bpf_reg_state *regs;
        int insn_cnt = env->prog->len;
        int insn_idx, prev_insn_idx = 0;
        int insn_processed = 0;
        bool do_print_state = false;
 
-       init_reg_state(regs);
+       state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
+       if (!state)
+               return -ENOMEM;
+       env->cur_state = state;
+       init_reg_state(state->regs);
        state->parent = NULL;
        insn_idx = 0;
        for (;;) {
@@ -3807,7 +3939,7 @@ static int do_check(struct bpf_verifier_env *env)
                        else
                                verbose("\nfrom %d to %d:",
                                        prev_insn_idx, insn_idx);
-                       print_verifier_state(&env->cur_state);
+                       print_verifier_state(env->cur_state);
                        do_print_state = false;
                }
 
@@ -3820,6 +3952,7 @@ static int do_check(struct bpf_verifier_env *env)
                if (err)
                        return err;
 
+               regs = cur_regs(env);
                env->insn_aux_data[insn_idx].seen = true;
                if (class == BPF_ALU || class == BPF_ALU64) {
                        err = check_alu_op(env, insn);
@@ -3991,8 +4124,10 @@ static int do_check(struct bpf_verifier_env *env)
                                }
 
 process_bpf_exit:
-                               insn_idx = pop_stack(env, &prev_insn_idx);
-                               if (insn_idx < 0) {
+                               err = pop_stack(env, &prev_insn_idx, &insn_idx);
+                               if (err < 0) {
+                                       if (err != -ENOENT)
+                                               return err;
                                        break;
                                } else {
                                        do_print_state = true;
@@ -4633,9 +4768,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
        env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
 
        ret = do_check(env);
+       free_verifier_state(env->cur_state);
+       env->cur_state = NULL;
 
 skip_full_check:
-       while (pop_stack(env, NULL) >= 0);
+       while (!pop_stack(env, NULL, NULL));
        free_states(env);
 
        if (ret == 0)
@@ -4741,9 +4878,11 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
        env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
 
        ret = do_check(env);
+       free_verifier_state(env->cur_state);
+       env->cur_state = NULL;
 
 skip_full_check:
-       while (pop_stack(env, NULL) >= 0);
+       while (!pop_stack(env, NULL, NULL));
        free_states(env);
 
        mutex_unlock(&bpf_verifier_lock);