Merge tag 'v3.10.64' into update
authorStricted <info@stricted.net>
Wed, 21 Mar 2018 21:33:51 +0000 (22:33 +0100)
committerStricted <info@stricted.net>
Wed, 21 Mar 2018 21:33:51 +0000 (22:33 +0100)
This is the 3.10.64 stable release

37 files changed:
Makefile
arch/s390/kernel/compat_linux.c
arch/x86/include/uapi/asm/ldt.h
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/process_64.c
arch/x86/kernel/tls.c
crypto/af_alg.c
drivers/md/bitmap.c
drivers/md/dm-bufio.c
drivers/md/persistent-data/dm-space-map-metadata.c
drivers/mfd/tc6393xb.c
drivers/mmc/card/block.c
drivers/scsi/megaraid/megaraid_sas_base.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_map.c
fs/ecryptfs/crypto.c
fs/ecryptfs/file.c
fs/ecryptfs/main.c
fs/isofs/rock.c
fs/namespace.c
fs/ncpfs/ioctl.c
fs/nfs/nfs4proc.c
fs/proc/base.c
fs/udf/symlink.c
include/linux/cred.h
include/linux/user_namespace.h
kernel/groups.c
kernel/pid.c
kernel/uid16.c
kernel/user.c
kernel/user_namespace.c
net/mac80211/key.c
net/mac80211/rx.c
security/keys/encrypted-keys/encrypted.c
tools/testing/selftests/mount/unprivileged-remount-test.c

index 5f6928b813c37e0849ffdef9eb3febe838c6b892..8cd55b2ef11fa7ee15d03ad83d6764e7530494fc 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 10
-SUBLEVEL = 63
+SUBLEVEL = 64
 EXTRAVERSION =
 NAME = TOSSUG Baby Fish
 
index 8b6e4f5288a29cc155fb1a3f20d46c08da38be07..a98afed9348b73f57e27b40dd92db99f38f51a72 100644 (file)
@@ -248,7 +248,7 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist)
        struct group_info *group_info;
        int retval;
 
-       if (!capable(CAP_SETGID))
+       if (!may_setgroups())
                return -EPERM;
        if ((unsigned)gidsetsize > NGROUPS_MAX)
                return -EINVAL;
index 46727eb37bfe20915242badc965a2b4985bd28b4..6e1aaf73852ac956156df80f4dfcb931633f5269 100644 (file)
@@ -28,6 +28,13 @@ struct user_desc {
        unsigned int  seg_not_present:1;
        unsigned int  useable:1;
 #ifdef __x86_64__
+       /*
+        * Because this bit is not present in 32-bit user code, user
+        * programs can pass uninitialized values here.  Therefore, in
+        * any context in which a user_desc comes from a 32-bit program,
+        * the kernel must act as though lm == 0, regardless of the
+        * actual value.
+        */
        unsigned int  lm:1;
 #endif
 };
index cd6d9a5a42f60dcb93a528853fe65b8773b0832e..c4ff2a9161399b92bce9a148263b79bdfb862338 100644 (file)
@@ -279,7 +279,14 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 static void __init paravirt_ops_setup(void)
 {
        pv_info.name = "KVM";
-       pv_info.paravirt_enabled = 1;
+
+       /*
+        * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+        * guest kernel works like a bare metal kernel with additional
+        * features, and paravirt_enabled is about features that are
+        * missing.
+        */
+       pv_info.paravirt_enabled = 0;
 
        if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                pv_cpu_ops.io_delay = kvm_io_delay;
index 3dd37ebd591b36db493d449506d33a6b8915841f..41514f56c24138176408fd63def75b14f02a29ac 100644 (file)
@@ -265,7 +265,6 @@ void __init kvmclock_init(void)
 #endif
        kvm_get_preset_lpj();
        clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-       pv_info.paravirt_enabled = 1;
        pv_info.name = "KVM";
 
        if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
index f99a242730e95ce185900990305e360444d36b44..7099ab1e075bd30b3b2b9d1458da7e6d61ef0e8c 100644 (file)
@@ -279,24 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
        fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-       /*
-        * Reload esp0, LDT and the page table pointer:
-        */
+       /* Reload esp0 and ss1. */
        load_sp0(tss, next);
 
-       /*
-        * Switch DS and ES.
-        * This won't pick up thread selector changes, but I guess that is ok.
-        */
-       savesegment(es, prev->es);
-       if (unlikely(next->es | prev->es))
-               loadsegment(es, next->es);
-
-       savesegment(ds, prev->ds);
-       if (unlikely(next->ds | prev->ds))
-               loadsegment(ds, next->ds);
-
-
        /* We must save %fs and %gs before load_TLS() because
         * %fs and %gs may be cleared by load_TLS().
         *
@@ -305,41 +290,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        savesegment(fs, fsindex);
        savesegment(gs, gsindex);
 
+       /*
+        * Load TLS before restoring any segments so that segment loads
+        * reference the correct GDT entries.
+        */
        load_TLS(next, cpu);
 
        /*
-        * Leave lazy mode, flushing any hypercalls made here.
-        * This must be done before restoring TLS segments so
-        * the GDT and LDT are properly updated, and must be
-        * done before math_state_restore, so the TS bit is up
-        * to date.
+        * Leave lazy mode, flushing any hypercalls made here.  This
+        * must be done after loading TLS entries in the GDT but before
+        * loading segments that might reference them, and and it must
+        * be done before math_state_restore, so the TS bit is up to
+        * date.
         */
        arch_end_context_switch(next_p);
 
+       /* Switch DS and ES.
+        *
+        * Reading them only returns the selectors, but writing them (if
+        * nonzero) loads the full descriptor from the GDT or LDT.  The
+        * LDT for next is loaded in switch_mm, and the GDT is loaded
+        * above.
+        *
+        * We therefore need to write new values to the segment
+        * registers on every context switch unless both the new and old
+        * values are zero.
+        *
+        * Note that we don't need to do anything for CS and SS, as
+        * those are saved and restored as part of pt_regs.
+        */
+       savesegment(es, prev->es);
+       if (unlikely(next->es | prev->es))
+               loadsegment(es, next->es);
+
+       savesegment(ds, prev->ds);
+       if (unlikely(next->ds | prev->ds))
+               loadsegment(ds, next->ds);
+
        /*
         * Switch FS and GS.
         *
-        * Segment register != 0 always requires a reload.  Also
-        * reload when it has changed.  When prev process used 64bit
-        * base always reload to avoid an information leak.
+        * These are even more complicated than FS and GS: they have
+        * 64-bit bases are that controlled by arch_prctl.  Those bases
+        * only differ from the values in the GDT or LDT if the selector
+        * is 0.
+        *
+        * Loading the segment register resets the hidden base part of
+        * the register to 0 or the value from the GDT / LDT.  If the
+        * next base address zero, writing 0 to the segment register is
+        * much faster than using wrmsr to explicitly zero the base.
+        *
+        * The thread_struct.fs and thread_struct.gs values are 0
+        * if the fs and gs bases respectively are not overridden
+        * from the values implied by fsindex and gsindex.  They
+        * are nonzero, and store the nonzero base addresses, if
+        * the bases are overridden.
+        *
+        * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
+        * be impossible.
+        *
+        * Therefore we need to reload the segment registers if either
+        * the old or new selector is nonzero, and we need to override
+        * the base address if next thread expects it to be overridden.
+        *
+        * This code is unnecessarily slow in the case where the old and
+        * new indexes are zero and the new base is nonzero -- it will
+        * unnecessarily write 0 to the selector before writing the new
+        * base address.
+        *
+        * Note: This all depends on arch_prctl being the only way that
+        * user code can override the segment base.  Once wrfsbase and
+        * wrgsbase are enabled, most of this code will need to change.
         */
        if (unlikely(fsindex | next->fsindex | prev->fs)) {
                loadsegment(fs, next->fsindex);
+
                /*
-                * Check if the user used a selector != 0; if yes
-                *  clear 64bit base, since overloaded base is always
-                *  mapped to the Null selector
+                * If user code wrote a nonzero value to FS, then it also
+                * cleared the overridden base address.
+                *
+                * XXX: if user code wrote 0 to FS and cleared the base
+                * address itself, we won't notice and we'll incorrectly
+                * restore the prior base address next time we reschdule
+                * the process.
                 */
                if (fsindex)
                        prev->fs = 0;
        }
-       /* when next process has a 64bit base use it */
        if (next->fs)
                wrmsrl(MSR_FS_BASE, next->fs);
        prev->fsindex = fsindex;
 
        if (unlikely(gsindex | next->gsindex | prev->gs)) {
                load_gs_index(next->gsindex);
+
+               /* This works (and fails) the same way as fsindex above. */
                if (gsindex)
                        prev->gs = 0;
        }
index f7fec09e3e3a83c9fec36dfe2f0c75906b8c23c1..4e942f31b1a7c9401a65fb37af093caab5ad0c2e 100644 (file)
@@ -27,6 +27,37 @@ static int get_free_idx(void)
        return -ESRCH;
 }
 
+static bool tls_desc_okay(const struct user_desc *info)
+{
+       if (LDT_empty(info))
+               return true;
+
+       /*
+        * espfix is required for 16-bit data segments, but espfix
+        * only works for LDT segments.
+        */
+       if (!info->seg_32bit)
+               return false;
+
+       /* Only allow data segments in the TLS array. */
+       if (info->contents > 1)
+               return false;
+
+       /*
+        * Non-present segments with DPL 3 present an interesting attack
+        * surface.  The kernel should handle such segments correctly,
+        * but TLS is very difficult to protect in a sandbox, so prevent
+        * such segments from being created.
+        *
+        * If userspace needs to remove a TLS entry, it can still delete
+        * it outright.
+        */
+       if (info->seg_not_present)
+               return false;
+
+       return true;
+}
+
 static void set_tls_desc(struct task_struct *p, int idx,
                         const struct user_desc *info, int n)
 {
@@ -66,6 +97,9 @@ int do_set_thread_area(struct task_struct *p, int idx,
        if (copy_from_user(&info, u_info, sizeof(info)))
                return -EFAULT;
 
+       if (!tls_desc_okay(&info))
+               return -EINVAL;
+
        if (idx == -1)
                idx = info.entry_number;
 
@@ -192,6 +226,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
 {
        struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
        const struct user_desc *info;
+       int i;
 
        if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
            (pos % sizeof(struct user_desc)) != 0 ||
@@ -205,6 +240,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
        else
                info = infobuf;
 
+       for (i = 0; i < count / sizeof(struct user_desc); i++)
+               if (!tls_desc_okay(info + i))
+                       return -EINVAL;
+
        set_tls_desc(target,
                     GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)),
                     info, count / sizeof(struct user_desc));
index bf948e134981a6ed47da7ab71d0aff8f8de81f51..6ef6e2ad344e1227dd7d125299f462ea7261a9cc 100644 (file)
@@ -449,6 +449,9 @@ void af_alg_complete(struct crypto_async_request *req, int err)
 {
        struct af_alg_completion *completion = req->data;
 
+       if (err == -EINPROGRESS)
+               return;
+
        completion->err = err;
        complete(&completion->completion);
 }
index 5a2c75499824f417a2e6bfd22f1b2c15e5135b1d..a79cbd6038f6ec5d79d5dc32b64f0933425f90f2 100644 (file)
@@ -883,7 +883,6 @@ void bitmap_unplug(struct bitmap *bitmap)
 {
        unsigned long i;
        int dirty, need_write;
-       int wait = 0;
 
        if (!bitmap || !bitmap->storage.filemap ||
            test_bit(BITMAP_STALE, &bitmap->flags))
@@ -901,16 +900,13 @@ void bitmap_unplug(struct bitmap *bitmap)
                        clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
                        write_page(bitmap, bitmap->storage.filemap[i], 0);
                }
-               if (dirty)
-                       wait = 1;
-       }
-       if (wait) { /* if any writes were performed, we need to wait on them */
-               if (bitmap->storage.file)
-                       wait_event(bitmap->write_wait,
-                                  atomic_read(&bitmap->pending_writes)==0);
-               else
-                       md_super_wait(bitmap->mddev);
        }
+       if (bitmap->storage.file)
+               wait_event(bitmap->write_wait,
+                          atomic_read(&bitmap->pending_writes)==0);
+       else
+               md_super_wait(bitmap->mddev);
+
        if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
                bitmap_file_kick(bitmap);
 }
index c9b4ca9e0696312d174b0122d62fe354fb0ab0a9..e855a190270d59bba68ae6851f45cf90ab7b0e17 100644 (file)
@@ -529,6 +529,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
                end_io(&b->bio, r);
 }
 
+static void inline_endio(struct bio *bio, int error)
+{
+       bio_end_io_t *end_fn = bio->bi_private;
+
+       /*
+        * Reset the bio to free any attached resources
+        * (e.g. bio integrity profiles).
+        */
+       bio_reset(bio);
+
+       end_fn(bio, error);
+}
+
 static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
                           bio_end_io_t *end_io)
 {
@@ -540,7 +553,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
        b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
        b->bio.bi_sector = block << b->c->sectors_per_block_bits;
        b->bio.bi_bdev = b->c->bdev;
-       b->bio.bi_end_io = end_io;
+       b->bio.bi_end_io = inline_endio;
+       /*
+        * Use of .bi_private isn't a problem here because
+        * the dm_buffer's inline bio is local to bufio.
+        */
+       b->bio.bi_private = end_io;
 
        /*
         * We assume that if len >= PAGE_SIZE ptr is page-aligned.
index afb419e514bf53f2ed1f1173c31471c5fac3e1f5..056d09c33af14a104c8f16b95cc7c06e251f4bb0 100644 (file)
@@ -493,7 +493,9 @@ static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count
 {
        struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
-       return smm->ll.nr_blocks;
+       *count = smm->ll.nr_blocks;
+
+       return 0;
 }
 
 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
index 15e1463e5e1334258a101b6b584d10fd4f3495a1..17fe83e81ea40744a8c1b7d2dadfbfaf4d44c092 100644 (file)
@@ -263,6 +263,17 @@ static int tc6393xb_ohci_disable(struct platform_device *dev)
        return 0;
 }
 
+static int tc6393xb_ohci_suspend(struct platform_device *dev)
+{
+       struct tc6393xb_platform_data *tcpd = dev_get_platdata(dev->dev.parent);
+
+       /* We can't properly store/restore OHCI state, so fail here */
+       if (tcpd->resume_restore)
+               return -EBUSY;
+
+       return tc6393xb_ohci_disable(dev);
+}
+
 static int tc6393xb_fb_enable(struct platform_device *dev)
 {
        struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent);
@@ -403,7 +414,7 @@ static struct mfd_cell tc6393xb_cells[] = {
                .num_resources = ARRAY_SIZE(tc6393xb_ohci_resources),
                .resources = tc6393xb_ohci_resources,
                .enable = tc6393xb_ohci_enable,
-               .suspend = tc6393xb_ohci_disable,
+               .suspend = tc6393xb_ohci_suspend,
                .resume = tc6393xb_ohci_enable,
                .disable = tc6393xb_ohci_disable,
        },
index c3c9dbd0aaace8a26b14297f5d04b57dc11a0792..ff313df8d6b8cef52795fa19d17169b3b2b992c1 100644 (file)
@@ -611,7 +611,7 @@ static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
        int ret;
        struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
 
-       ret = snprintf(buf, PAGE_SIZE, "%d",
+       ret = snprintf(buf, PAGE_SIZE, "%d\n",
                       get_disk_ro(dev_to_disk(dev)) ^
                       md->read_only);
        mmc_blk_put(md);
index 4956c99ed90e5f5bcd9fcedb651e22450ee196a1..78b4fe84524587fca74d5bc38c06c58816de8ecf 100644 (file)
@@ -933,7 +933,7 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance,
        abort_fr->abort_mfi_phys_addr_hi = 0;
 
        cmd->sync_cmd = 1;
-       cmd->cmd_status = 0xFF;
+       cmd->cmd_status = ENODATA;
 
        instance->instancet->issue_dcmd(instance, cmd);
 
index c0fc047e6bfce2cbf781f4389daf36a0764c9768..d70ee41d08de58bc90960e183a9661ef5d09b64e 100644 (file)
@@ -3861,12 +3861,6 @@ again:
                if (ret)
                        break;
 
-               /* opt_discard */
-               if (btrfs_test_opt(root, DISCARD))
-                       ret = btrfs_error_discard_extent(root, start,
-                                                        end + 1 - start,
-                                                        NULL);
-
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
                btrfs_error_unpin_extent_range(root, start, end);
                cond_resched();
index bbafa05519daa69b0fb05dc5d5583cbfb4c6fa56..f99c71e40f8b86a91a73c4ace7a17f933c4de464 100644 (file)
@@ -5277,7 +5277,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        update_global_block_rsv(fs_info);
 }
 
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+                             const bool return_free_space)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_block_group_cache *cache = NULL;
@@ -5301,7 +5302,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 
                if (start < cache->last_byte_to_unpin) {
                        len = min(len, cache->last_byte_to_unpin - start);
-                       btrfs_add_free_space(cache, start, len);
+                       if (return_free_space)
+                               btrfs_add_free_space(cache, start, len);
                }
 
                start += len;
@@ -5364,7 +5366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                                                   end + 1 - start, NULL);
 
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
-               unpin_extent_range(root, start, end);
+               unpin_extent_range(root, start, end, true);
                cond_resched();
        }
 
@@ -8564,7 +8566,7 @@ out:
 
 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
-       return unpin_extent_range(root, start, end);
+       return unpin_extent_range(root, start, end, false);
 }
 
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
index a4a7a1a8da95c4c1e7571d99e0d58a7b5209f4ee..0a3809500599e8cd71671ab46ca2f0f5e06c0eaa 100644 (file)
@@ -263,8 +263,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
        if (!em)
                goto out;
 
-       if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
-               list_move(&em->list, &tree->modified_extents);
        em->generation = gen;
        clear_bit(EXTENT_FLAG_PINNED, &em->flags);
        em->mod_start = em->start;
index f71ec125290db7da87355f444f7308826ee1c034..1da2446bf6b003a1d636b852e107b908ed50ac1f 100644 (file)
@@ -2102,7 +2102,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
                        break;
                case 2:
                        dst[dst_byte_offset++] |= (src_byte);
-                       dst[dst_byte_offset] = 0;
                        current_bit_offset = 0;
                        break;
                }
index a7abbea2c09638ef8c190555ec466834c0c06edf..9ff3664bb3ea460d139982f49d65378b4f16bf58 100644 (file)
@@ -196,23 +196,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 {
        int rc = 0;
        struct ecryptfs_crypt_stat *crypt_stat = NULL;
-       struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
        struct dentry *ecryptfs_dentry = file->f_path.dentry;
        /* Private value of ecryptfs_dentry allocated in
         * ecryptfs_lookup() */
        struct ecryptfs_file_info *file_info;
 
-       mount_crypt_stat = &ecryptfs_superblock_to_private(
-               ecryptfs_dentry->d_sb)->mount_crypt_stat;
-       if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
-           && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
-               || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
-               || (file->f_flags & O_APPEND))) {
-               printk(KERN_WARNING "Mount has encrypted view enabled; "
-                      "files may only be read\n");
-               rc = -EPERM;
-               goto out;
-       }
        /* Released in ecryptfs_release or end of function if failure */
        file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
        ecryptfs_set_file_private(file, file_info);
index 753b99301a7aa6013e4ac007f9c3b756672494d8..a178201113df2bdaa0f0058f99cb2a6f63da4bfb 100644 (file)
@@ -494,6 +494,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 {
        struct super_block *s;
        struct ecryptfs_sb_info *sbi;
+       struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
        struct ecryptfs_dentry_info *root_info;
        const char *err = "Getting sb failed";
        struct inode *inode;
@@ -512,6 +513,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                err = "Error parsing options";
                goto out;
        }
+       mount_crypt_stat = &sbi->mount_crypt_stat;
 
        s = sget(fs_type, NULL, set_anon_super, flags, NULL);
        if (IS_ERR(s)) {
@@ -558,11 +560,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 
        /**
         * Set the POSIX ACL flag based on whether they're enabled in the lower
-        * mount. Force a read-only eCryptfs mount if the lower mount is ro.
-        * Allow a ro eCryptfs mount even when the lower mount is rw.
+        * mount.
         */
        s->s_flags = flags & ~MS_POSIXACL;
-       s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+       s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+       /**
+        * Force a read-only eCryptfs mount when:
+        *   1) The lower mount is ro
+        *   2) The ecryptfs_encrypted_view mount option is specified
+        */
+       if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+           mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+               s->s_flags |= MS_RDONLY;
 
        s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
        s->s_blocksize = path.dentry->d_sb->s_blocksize;
index f488bbae541ac8d5db4eb7e963c33452ebb3e937..735d7522a3a911f19af593d6b5f7d366d6cf448d 100644 (file)
@@ -30,6 +30,7 @@ struct rock_state {
        int cont_size;
        int cont_extent;
        int cont_offset;
+       int cont_loops;
        struct inode *inode;
 };
 
@@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode)
        rs->inode = inode;
 }
 
+/* Maximum number of Rock Ridge continuation entries */
+#define RR_MAX_CE_ENTRIES 32
+
 /*
  * Returns 0 if the caller should continue scanning, 1 if the scan must end
  * and -ve on error.
@@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs)
                        goto out;
                }
                ret = -EIO;
+               if (++rs->cont_loops >= RR_MAX_CE_ENTRIES)
+                       goto out;
                bh = sb_bread(rs->inode->i_sb, rs->cont_extent);
                if (bh) {
                        memcpy(rs->buffer, bh->b_data + rs->cont_offset,
@@ -356,6 +362,9 @@ repeat:
                        rs.cont_size = isonum_733(rr->u.CE.size);
                        break;
                case SIG('E', 'R'):
+                       /* Invalid length of ER tag id? */
+                       if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len)
+                               goto out;
                        ISOFS_SB(inode->i_sb)->s_rock = 1;
                        printk(KERN_DEBUG "ISO 9660 Extensions: ");
                        {
index 44d4218a4a6affaa061b2b3da067ad13e82ee1af..a2c0b962e5d282993faf3a849278581df46a27e0 100644 (file)
@@ -1474,6 +1474,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
                goto dput_and_out;
        if (!check_mnt(mnt))
                goto dput_and_out;
+       retval = -EPERM;
+       if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+               goto dput_and_out;
 
        retval = do_umount(mnt, flags);
 #ifdef UMOUNT_LOG
@@ -1974,7 +1977,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
        }
        if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
            !(mnt_flags & MNT_NODEV)) {
-               return -EPERM;
+               /* Was the nodev implicitly added in mount? */
+               if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+                   !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+                       mnt_flags |= MNT_NODEV;
+               } else {
+                       return -EPERM;
+               }
        }
        if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
            !(mnt_flags & MNT_NOSUID)) {
index 60426ccb3b6561e25b050f50139df908a6b251fe..2f970de02b1629c649f591281bc24e5810ebd870 100644 (file)
@@ -448,7 +448,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                result = -EIO;
                                        }
                                }
-                               result = 0;
                        }
                        mutex_unlock(&server->root_setup_lock);
 
index a4eaa40e7bdb50f9e1eadc87d006736fb20bfc6d..86390c3a95dbb1c99c1f7f8f14ff7557b5f36a14 100644 (file)
@@ -6418,6 +6418,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 
        dprintk("--> %s\n", __func__);
 
+       /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+       pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
        lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
        if (!lgp->args.layout.pages) {
                nfs4_layoutget_release(lgp);
@@ -6430,9 +6433,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
        lgp->res.seq_res.sr_slot = NULL;
        nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
 
-       /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
-       pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return ERR_CAST(task);
index 1caa60e5271e76f687f88faae669ad74b0198169..4b6f38b759c66bc687424ee73c4fabff2535a604 100644 (file)
@@ -2648,6 +2648,57 @@ static const struct file_operations proc_projid_map_operations = {
        .llseek         = seq_lseek,
        .release        = proc_id_map_release,
 };
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+       struct user_namespace *ns = NULL;
+       struct task_struct *task;
+       int ret;
+
+       ret = -ESRCH;
+       task = get_proc_task(inode);
+       if (task) {
+               rcu_read_lock();
+               ns = get_user_ns(task_cred_xxx(task, user_ns));
+               rcu_read_unlock();
+               put_task_struct(task);
+       }
+       if (!ns)
+               goto err;
+
+       if (file->f_mode & FMODE_WRITE) {
+               ret = -EACCES;
+               if (!ns_capable(ns, CAP_SYS_ADMIN))
+                       goto err_put_ns;
+       }
+
+       ret = single_open(file, &proc_setgroups_show, ns);
+       if (ret)
+               goto err_put_ns;
+
+       return 0;
+err_put_ns:
+       put_user_ns(ns);
+err:
+       return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq = file->private_data;
+       struct user_namespace *ns = seq->private;
+       int ret = single_release(inode, file);
+       put_user_ns(ns);
+       return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+       .open           = proc_setgroups_open,
+       .write          = proc_setgroups_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = proc_setgroups_release,
+};
 #endif /* CONFIG_USER_NS */
 
 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2756,6 +2807,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
        REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
        REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+       REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 #ifdef CONFIG_CHECKPOINT_RESTORE
        REG("timers",     S_IRUGO, proc_timers_operations),
@@ -3109,6 +3161,7 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
        REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
        REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+       REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 };
 
index d7c6dbe4194bb33bbe37930c63cf23377e5d9343..d89f324bc38797220f6afcd09962e63d2c70c1e4 100644 (file)
@@ -80,11 +80,17 @@ static int udf_symlink_filler(struct file *file, struct page *page)
        struct inode *inode = page->mapping->host;
        struct buffer_head *bh = NULL;
        unsigned char *symlink;
-       int err = -EIO;
+       int err;
        unsigned char *p = kmap(page);
        struct udf_inode_info *iinfo;
        uint32_t pos;
 
+       /* We don't support symlinks longer than one block */
+       if (inode->i_size > inode->i_sb->s_blocksize) {
+               err = -ENAMETOOLONG;
+               goto out_unmap;
+       }
+
        iinfo = UDF_I(inode);
        pos = udf_block_map(inode, 0);
 
@@ -94,8 +100,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
        } else {
                bh = sb_bread(inode->i_sb, pos);
 
-               if (!bh)
-                       goto out;
+               if (!bh) {
+                       err = -EIO;
+                       goto out_unlock_inode;
+               }
 
                symlink = bh->b_data;
        }
@@ -109,9 +117,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
        unlock_page(page);
        return 0;
 
-out:
+out_unlock_inode:
        up_read(&iinfo->i_data_sem);
        SetPageError(page);
+out_unmap:
        kunmap(page);
        unlock_page(page);
        return err;
index 04421e82536596c5aeb7a28b35e142556c821598..6c58dd7cb9ace20c8a30a429490cac03df022fcf 100644 (file)
@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
 extern int set_current_groups(struct group_info *);
 extern int set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
+extern bool may_setgroups(void);
 
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
index 14105c26a83618da5d91ae09331fb83ba8f8ad6b..a37081cf59da637dd6b8d81ead7073485c475d73 100644 (file)
@@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */
        } extent[UID_GID_MAP_MAX_EXTENTS];
 };
 
+#define USERNS_SETGROUPS_ALLOWED 1UL
+
+#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
+
 struct user_namespace {
        struct uid_gid_map      uid_map;
        struct uid_gid_map      gid_map;
@@ -27,6 +31,7 @@ struct user_namespace {
        kuid_t                  owner;
        kgid_t                  group;
        unsigned int            proc_inum;
+       unsigned long           flags;
        bool                    may_mount_sysfs;
        bool                    may_mount_proc;
 };
@@ -59,6 +64,9 @@ extern struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
+extern int proc_setgroups_show(struct seq_file *m, void *v);
+extern bool userns_may_setgroups(const struct user_namespace *ns);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -83,6 +91,10 @@ static inline void put_user_ns(struct user_namespace *ns)
 {
 }
 
+static inline bool userns_may_setgroups(const struct user_namespace *ns)
+{
+       return true;
+}
 #endif
 
 void update_mnt_policy(struct user_namespace *userns);
index 6b2588dd04ff20fb89995394f9c530a2613fbb83..67b4ba30475fbc2b902e5a6226aeac2d2aaa5803 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
@@ -223,6 +224,14 @@ out:
        return i;
 }
 
+bool may_setgroups(void)
+{
+       struct user_namespace *user_ns = current_user_ns();
+
+       return ns_capable(user_ns, CAP_SETGID) &&
+               userns_may_setgroups(user_ns);
+}
+
 /*
  *     SMP: Our groups are copy-on-write. We can set them safely
  *     without another task interfering.
@@ -233,7 +242,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
        struct group_info *group_info;
        int retval;
 
-       if (!nsown_capable(CAP_SETGID))
+       if (!may_setgroups())
                return -EPERM;
        if ((unsigned)gidsetsize > NGROUPS_MAX)
                return -EINVAL;
index b6a88e3633f13a9b0c9c609ba2bbb0fbc03f286d..60af6a023c0e5830103f2916d5b81ae302ca003a 100644 (file)
@@ -335,6 +335,8 @@ out:
 
 out_unlock:
        spin_unlock_irq(&pidmap_lock);
+       put_pid_ns(ns);
+
 out_free:
        while (++i <= ns->level)
                free_pidmap(pid->numbers + i);
index f6c83d7ef0006fffe3cc9811b736c513c1b8f6f1..d58cc4d8f0d1fa95c7ec0120cb408a9b4ad859e5 100644 (file)
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
        struct group_info *group_info;
        int retval;
 
-       if (!nsown_capable(CAP_SETGID))
+       if (!may_setgroups())
                return -EPERM;
        if ((unsigned)gidsetsize > NGROUPS_MAX)
                return -EINVAL;
index 69b4c3d48cdee20fc94be4d2869e2e330ba0a7b2..6bbef5604101cf52f6d052f7de96e373920847b5 100644 (file)
@@ -51,6 +51,7 @@ struct user_namespace init_user_ns = {
        .owner = GLOBAL_ROOT_UID,
        .group = GLOBAL_ROOT_GID,
        .proc_inum = PROC_USER_INIT_INO,
+       .flags = USERNS_INIT_FLAGS,
        .may_mount_sysfs = true,
        .may_mount_proc = true,
 };
index 9bea1d7dd21fac61c2084cb5bc295c1980c2573e..3f2fb33d291aa8739ab09a2f63d090695787c765 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/fs_struct.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
+static DEFINE_MUTEX(userns_state_mutex);
 
 static bool new_idmap_permitted(const struct file *file,
                                struct user_namespace *ns, int cap_setid,
@@ -99,6 +100,11 @@ int create_user_ns(struct cred *new)
        ns->owner = owner;
        ns->group = group;
 
+       /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
+       mutex_lock(&userns_state_mutex);
+       ns->flags = parent_ns->flags;
+       mutex_unlock(&userns_state_mutex);
+
        set_cred_user_ns(new, ns);
 
        update_mnt_policy(ns);
@@ -577,9 +583,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent
        return false;
 }
 
-
-static DEFINE_MUTEX(id_map_mutex);
-
 static ssize_t map_write(struct file *file, const char __user *buf,
                         size_t count, loff_t *ppos,
                         int cap_setid,
@@ -596,7 +599,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
        ssize_t ret = -EINVAL;
 
        /*
-        * The id_map_mutex serializes all writes to any given map.
+        * The userns_state_mutex serializes all writes to any given map.
         *
         * Any map is only ever written once.
         *
@@ -614,7 +617,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
         * order and smp_rmb() is guaranteed that we don't have crazy
         * architectures returning stale data.
         */
-       mutex_lock(&id_map_mutex);
+       mutex_lock(&userns_state_mutex);
 
        ret = -EPERM;
        /* Only allow one successful write to the map */
@@ -741,7 +744,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
        *ppos = count;
        ret = count;
 out:
-       mutex_unlock(&id_map_mutex);
+       mutex_unlock(&userns_state_mutex);
        if (page)
                free_page(page);
        return ret;
@@ -800,17 +803,21 @@ static bool new_idmap_permitted(const struct file *file,
                                struct user_namespace *ns, int cap_setid,
                                struct uid_gid_map *new_map)
 {
-       /* Allow mapping to your own filesystem ids */
-       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+       const struct cred *cred = file->f_cred;
+       /* Don't allow mappings that would allow anything that wouldn't
+        * be allowed without the establishment of unprivileged mappings.
+        */
+       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
+           uid_eq(ns->owner, cred->euid)) {
                u32 id = new_map->extent[0].lower_first;
                if (cap_setid == CAP_SETUID) {
                        kuid_t uid = make_kuid(ns->parent, id);
-                       if (uid_eq(uid, file->f_cred->fsuid))
+                       if (uid_eq(uid, cred->euid))
                                return true;
-               }
-               else if (cap_setid == CAP_SETGID) {
+               } else if (cap_setid == CAP_SETGID) {
                        kgid_t gid = make_kgid(ns->parent, id);
-                       if (gid_eq(gid, file->f_cred->fsgid))
+                       if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
+                           gid_eq(gid, cred->egid))
                                return true;
                }
        }
@@ -830,6 +837,100 @@ static bool new_idmap_permitted(const struct file *file,
        return false;
 }
 
+int proc_setgroups_show(struct seq_file *seq, void *v)
+{
+       struct user_namespace *ns = seq->private;
+       unsigned long userns_flags = ACCESS_ONCE(ns->flags);
+
+       seq_printf(seq, "%s\n",
+                  (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
+                  "allow" : "deny");
+       return 0;
+}
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+                            size_t count, loff_t *ppos)
+{
+       struct seq_file *seq = file->private_data;
+       struct user_namespace *ns = seq->private;
+       char kbuf[8], *pos;
+       bool setgroups_allowed;
+       ssize_t ret;
+
+       /* Only allow a very narrow range of strings to be written */
+       ret = -EINVAL;
+       if ((*ppos != 0) || (count >= sizeof(kbuf)))
+               goto out;
+
+       /* What was written? */
+       ret = -EFAULT;
+       if (copy_from_user(kbuf, buf, count))
+               goto out;
+       kbuf[count] = '\0';
+       pos = kbuf;
+
+       /* What is being requested? */
+       ret = -EINVAL;
+       if (strncmp(pos, "allow", 5) == 0) {
+               pos += 5;
+               setgroups_allowed = true;
+       }
+       else if (strncmp(pos, "deny", 4) == 0) {
+               pos += 4;
+               setgroups_allowed = false;
+       }
+       else
+               goto out;
+
+       /* Verify there is not trailing junk on the line */
+       pos = skip_spaces(pos);
+       if (*pos != '\0')
+               goto out;
+
+       ret = -EPERM;
+       mutex_lock(&userns_state_mutex);
+       if (setgroups_allowed) {
+               /* Enabling setgroups after setgroups has been disabled
+                * is not allowed.
+                */
+               if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
+                       goto out_unlock;
+       } else {
+               /* Permanently disabling setgroups after setgroups has
+                * been enabled by writing the gid_map is not allowed.
+                */
+               if (ns->gid_map.nr_extents != 0)
+                       goto out_unlock;
+               ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
+       }
+       mutex_unlock(&userns_state_mutex);
+
+       /* Report a successful write */
+       *ppos = count;
+       ret = count;
+out:
+       return ret;
+out_unlock:
+       mutex_unlock(&userns_state_mutex);
+       goto out;
+}
+
+bool userns_may_setgroups(const struct user_namespace *ns)
+{
+       bool allowed;
+
+       mutex_lock(&userns_state_mutex);
+       /* It is not safe to use setgroups until a gid mapping in
+        * the user namespace has been established.
+        */
+       allowed = ns->gid_map.nr_extents != 0;
+       /* Is setgroups allowed? */
+       allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+       mutex_unlock(&userns_state_mutex);
+
+       return allowed;
+}
+
 static void *userns_get(struct task_struct *task)
 {
        struct user_namespace *user_ns;
index 67059b88fea5f28619c97ad2ca8c150840045055..635d0972b688c3a08c2b79fc1099dc27d041140e 100644 (file)
@@ -607,7 +607,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
        int i;
 
        mutex_lock(&local->key_mtx);
-       for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+       for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
                key = key_mtx_dereference(local, sta->gtk[i]);
                if (!key)
                        continue;
index 85bc6d498b46f59b76658881cf8e55aac2df6e34..9299a38c372e803332788d8e73a2a20e9602ab49 100644 (file)
@@ -1585,14 +1585,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
        sc = le16_to_cpu(hdr->seq_ctrl);
        frag = sc & IEEE80211_SCTL_FRAG;
 
-       if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
-               goto out;
-
        if (is_multicast_ether_addr(hdr->addr1)) {
                rx->local->dot11MulticastReceivedFrameCount++;
-               goto out;
+               goto out_no_led;
        }
 
+       if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
+               goto out;
+
        I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
        if (skb_linearize(rx->skb))
@@ -1683,9 +1683,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
        status->rx_flags |= IEEE80211_RX_FRAGMENTED;
 
  out:
+       ieee80211_led_rx(rx->local);
+ out_no_led:
        if (rx->sta)
                rx->sta->rx_packets++;
-       ieee80211_led_rx(rx->local);
        return RX_CONTINUE;
 }
 
index 9e1e005c75967d497831fb67146020aeed4bc4a8..c4c8df4b214d9f0bc2f941e0144526b3260a7a1f 100644 (file)
@@ -1018,10 +1018,13 @@ static int __init init_encrypted(void)
        ret = encrypted_shash_alloc();
        if (ret < 0)
                return ret;
+       ret = aes_get_sizes();
+       if (ret < 0)
+               goto out;
        ret = register_key_type(&key_type_encrypted);
        if (ret < 0)
                goto out;
-       return aes_get_sizes();
+       return 0;
 out:
        encrypted_shash_release();
        return ret;
index 1b3ff2fda4d0e410102b0ec663ec360d37fe3e97..517785052f1c37a4c368810c5c88d35119c9cab0 100644 (file)
@@ -6,6 +6,8 @@
 #include <sys/types.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <fcntl.h>
 # define CLONE_NEWPID 0x20000000
 #endif
 
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
 #ifndef MS_RELATIME
-#define MS_RELATIME (1 << 21)
+# define MS_RELATIME (1 << 21)
 #endif
 #ifndef MS_STRICTATIME
-#define MS_STRICTATIME (1 << 24)
+# define MS_STRICTATIME (1 << 24)
 #endif
 
 static void die(char *fmt, ...)
@@ -48,17 +53,14 @@ static void die(char *fmt, ...)
        exit(EXIT_FAILURE);
 }
 
-static void write_file(char *filename, char *fmt, ...)
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
 {
        char buf[4096];
        int fd;
        ssize_t written;
        int buf_len;
-       va_list ap;
 
-       va_start(ap, fmt);
        buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
-       va_end(ap);
        if (buf_len < 0) {
                die("vsnprintf failed: %s\n",
                    strerror(errno));
@@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...)
 
        fd = open(filename, O_WRONLY);
        if (fd < 0) {
+               if ((errno == ENOENT) && enoent_ok)
+                       return;
                die("open of %s failed: %s\n",
                    filename, strerror(errno));
        }
@@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...)
        }
 }
 
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+       vmaybe_write_file(true, filename, fmt, ap);
+       va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+       vmaybe_write_file(false, filename, fmt, ap);
+       va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+       int ret;
+       struct statvfs stat;
+       int mnt_flags;
+
+       ret = statvfs(path, &stat);
+       if (ret != 0) {
+               die("statvfs of %s failed: %s\n",
+                       path, strerror(errno));
+       }
+       if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+                       ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+                       ST_SYNCHRONOUS | ST_MANDLOCK)) {
+               die("Unrecognized mount flags\n");
+       }
+       mnt_flags = 0;
+       if (stat.f_flag & ST_RDONLY)
+               mnt_flags |= MS_RDONLY;
+       if (stat.f_flag & ST_NOSUID)
+               mnt_flags |= MS_NOSUID;
+       if (stat.f_flag & ST_NODEV)
+               mnt_flags |= MS_NODEV;
+       if (stat.f_flag & ST_NOEXEC)
+               mnt_flags |= MS_NOEXEC;
+       if (stat.f_flag & ST_NOATIME)
+               mnt_flags |= MS_NOATIME;
+       if (stat.f_flag & ST_NODIRATIME)
+               mnt_flags |= MS_NODIRATIME;
+       if (stat.f_flag & ST_RELATIME)
+               mnt_flags |= MS_RELATIME;
+       if (stat.f_flag & ST_SYNCHRONOUS)
+               mnt_flags |= MS_SYNCHRONOUS;
+       if (stat.f_flag & ST_MANDLOCK)
+               mnt_flags |= ST_MANDLOCK;
+
+       return mnt_flags;
+}
+
 static void create_and_enter_userns(void)
 {
        uid_t uid;
@@ -100,13 +163,10 @@ static void create_and_enter_userns(void)
                        strerror(errno));
        }
 
+       maybe_write_file("/proc/self/setgroups", "deny");
        write_file("/proc/self/uid_map", "0 %d 1", uid);
        write_file("/proc/self/gid_map", "0 %d 1", gid);
 
-       if (setgroups(0, NULL) != 0) {
-               die("setgroups failed: %s\n",
-                       strerror(errno));
-       }
        if (setgid(0) != 0) {
                die ("setgid(0) failed %s\n",
                        strerror(errno));
@@ -118,7 +178,8 @@ static void create_and_enter_userns(void)
 }
 
 static
-bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+                        int mount_flags, int remount_flags, int invalid_flags)
 {
        pid_t child;
 
@@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
                        strerror(errno));
        }
 
-       if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
-               die("mount of /tmp failed: %s\n",
-                       strerror(errno));
+       if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+               die("mount of %s with options '%s' on /tmp failed: %s\n",
+                   fstype,
+                   mount_options? mount_options : "",
+                   strerror(errno));
        }
 
        create_and_enter_userns();
@@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
 
 static bool test_unpriv_remount_simple(int mount_flags)
 {
-       return test_unpriv_remount(mount_flags, mount_flags, 0);
+       return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
 }
 
 static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
 {
-       return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
+       return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+                                  invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+       pid_t child;
+       int ret;
+       const char *orig_path = "/dev";
+       const char *dest_path = "/tmp";
+       int orig_mnt_flags, remount_mnt_flags;
+
+       child = fork();
+       if (child == -1) {
+               die("fork failed: %s\n",
+                       strerror(errno));
+       }
+       if (child != 0) { /* parent */
+               pid_t pid;
+               int status;
+               pid = waitpid(child, &status, 0);
+               if (pid == -1) {
+                       die("waitpid failed: %s\n",
+                               strerror(errno));
+               }
+               if (pid != child) {
+                       die("waited for %d got %d\n",
+                               child, pid);
+               }
+               if (!WIFEXITED(status)) {
+                       die("child did not terminate cleanly\n");
+               }
+               return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+       }
+
+       orig_mnt_flags = read_mnt_flags(orig_path);
+
+       create_and_enter_userns();
+       ret = unshare(CLONE_NEWNS);
+       if (ret != 0) {
+               die("unshare(CLONE_NEWNS) failed: %s\n",
+                       strerror(errno));
+       }
+
+       ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+       if (ret != 0) {
+               die("recursive bind mount of %s onto %s failed: %s\n",
+                       orig_path, dest_path, strerror(errno));
+       }
+
+       ret = mount(dest_path, dest_path, "none",
+                   MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+       if (ret != 0) {
+               /* system("cat /proc/self/mounts"); */
+               die("remount of /tmp failed: %s\n",
+                   strerror(errno));
+       }
+
+       remount_mnt_flags = read_mnt_flags(dest_path);
+       if (orig_mnt_flags != remount_mnt_flags) {
+               die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+                       dest_path, orig_path);
+       }
+       exit(EXIT_SUCCESS);
 }
 
 int main(int argc, char **argv)
 {
-       if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
+       if (!test_unpriv_remount_simple(MS_RDONLY)) {
                die("MS_RDONLY malfunctions\n");
        }
-       if (!test_unpriv_remount_simple(MS_NODEV)) {
+       if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
                die("MS_NODEV malfunctions\n");
        }
-       if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
+       if (!test_unpriv_remount_simple(MS_NOSUID)) {
                die("MS_NOSUID malfunctions\n");
        }
-       if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
+       if (!test_unpriv_remount_simple(MS_NOEXEC)) {
                die("MS_NOEXEC malfunctions\n");
        }
-       if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
-                                      MS_NOATIME|MS_NODEV))
+       if (!test_unpriv_remount_atime(MS_RELATIME,
+                                      MS_NOATIME))
        {
                die("MS_RELATIME malfunctions\n");
        }
-       if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
-                                      MS_NOATIME|MS_NODEV))
+       if (!test_unpriv_remount_atime(MS_STRICTATIME,
+                                      MS_NOATIME))
        {
                die("MS_STRICTATIME malfunctions\n");
        }
-       if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
-                                      MS_STRICTATIME|MS_NODEV))
+       if (!test_unpriv_remount_atime(MS_NOATIME,
+                                      MS_STRICTATIME))
        {
-               die("MS_RELATIME malfunctions\n");
+               die("MS_NOATIME malfunctions\n");
        }
-       if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
-                                      MS_NOATIME|MS_NODEV))
+       if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+                                      MS_NOATIME))
        {
-               die("MS_RELATIME malfunctions\n");
+               die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
        }
-       if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
-                                      MS_NOATIME|MS_NODEV))
+       if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+                                      MS_NOATIME))
        {
-               die("MS_RELATIME malfunctions\n");
+               die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
        }
-       if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
-                                      MS_STRICTATIME|MS_NODEV))
+       if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+                                      MS_STRICTATIME))
        {
-               die("MS_RELATIME malfunctions\n");
+               die("MS_NOATIME|MS_DIRATIME malfunctions\n");
        }
-       if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
-                                MS_NOATIME|MS_NODEV))
+       if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
        {
                die("Default atime malfunctions\n");
        }
+       if (!test_priv_mount_unpriv_remount()) {
+               die("Mount flags unexpectedly changed after remount\n");
+       }
        return EXIT_SUCCESS;
 }