brcm80211: pointless current->files passed to filp_close()
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / namei.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/namei.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7/*
8 * Some corrections by tytso.
9 */
10
11/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
13 */
14/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15 */
16
17#include <linux/init.h>
630d9c47 18#include <linux/export.h>
44696908 19#include <linux/kernel.h>
1da177e4
LT
20#include <linux/slab.h>
21#include <linux/fs.h>
22#include <linux/namei.h>
1da177e4 23#include <linux/pagemap.h>
0eeca283 24#include <linux/fsnotify.h>
1da177e4
LT
25#include <linux/personality.h>
26#include <linux/security.h>
6146f0d5 27#include <linux/ima.h>
1da177e4
LT
28#include <linux/syscalls.h>
29#include <linux/mount.h>
30#include <linux/audit.h>
16f7e0fe 31#include <linux/capability.h>
834f2a4a 32#include <linux/file.h>
5590ff0d 33#include <linux/fcntl.h>
08ce5f16 34#include <linux/device_cgroup.h>
5ad4e53b 35#include <linux/fs_struct.h>
e77819e5 36#include <linux/posix_acl.h>
1da177e4
LT
37#include <asm/uaccess.h>
38
e81e3f4d 39#include "internal.h"
c7105365 40#include "mount.h"
e81e3f4d 41
1da177e4
LT
42/* [Feb-1997 T. Schoebel-Theuer]
43 * Fundamental changes in the pathname lookup mechanisms (namei)
44 * were necessary because of omirr. The reason is that omirr needs
45 * to know the _real_ pathname, not the user-supplied one, in case
46 * of symlinks (and also when transname replacements occur).
47 *
48 * The new code replaces the old recursive symlink resolution with
49 * an iterative one (in case of non-nested symlink chains). It does
50 * this with calls to <fs>_follow_link().
51 * As a side effect, dir_namei(), _namei() and follow_link() are now
52 * replaced with a single function lookup_dentry() that can handle all
53 * the special cases of the former code.
54 *
55 * With the new dcache, the pathname is stored at each inode, at least as
56 * long as the refcount of the inode is positive. As a side effect, the
57 * size of the dcache depends on the inode cache and thus is dynamic.
58 *
59 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
60 * resolution to correspond with current state of the code.
61 *
62 * Note that the symlink resolution is not *completely* iterative.
63 * There is still a significant amount of tail- and mid- recursion in
64 * the algorithm. Also, note that <fs>_readlink() is not used in
65 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
66 * may return different results than <fs>_follow_link(). Many virtual
67 * filesystems (including /proc) exhibit this behavior.
68 */
69
70/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
71 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
72 * and the name already exists in form of a symlink, try to create the new
73 * name indicated by the symlink. The old code always complained that the
74 * name already exists, due to not following the symlink even if its target
75 * is nonexistent. The new semantics affects also mknod() and link() when
25985edc 76 * the name is a symlink pointing to a non-existent name.
1da177e4
LT
77 *
78 * I don't know which semantics is the right one, since I have no access
79 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
80 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
81 * "old" one. Personally, I think the new semantics is much more logical.
82 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
83 * file does succeed in both HP-UX and SunOs, but not in Solaris
84 * and in the old Linux semantics.
85 */
86
87/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
88 * semantics. See the comments in "open_namei" and "do_link" below.
89 *
90 * [10-Sep-98 Alan Modra] Another symlink change.
91 */
92
93/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
94 * inside the path - always follow.
95 * in the last component in creation/removal/renaming - never follow.
96 * if LOOKUP_FOLLOW passed - follow.
97 * if the pathname has trailing slashes - follow.
98 * otherwise - don't follow.
99 * (applied in that order).
100 *
101 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
102 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
103 * During the 2.4 we need to fix the userland stuff depending on it -
104 * hopefully we will be able to get rid of that wart in 2.5. So far only
105 * XEmacs seems to be relying on it...
106 */
107/*
108 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
a11f3a05 109 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives
1da177e4
LT
110 * any extra contention...
111 */
112
113/* In order to reduce some races, while at the same time doing additional
114 * checking and hopefully speeding things up, we copy filenames to the
115 * kernel data space before using them..
116 *
117 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
118 * PATH_MAX includes the nul terminator --RR.
119 */
1fa1e7f6 120static char *getname_flags(const char __user *filename, int flags, int *empty)
1da177e4 121{
3f9f0aa6
LT
122 char *result = __getname(), *err;
123 int len;
4043cde8 124
3f9f0aa6 125 if (unlikely(!result))
4043cde8
EP
126 return ERR_PTR(-ENOMEM);
127
3f9f0aa6
LT
128 len = strncpy_from_user(result, filename, PATH_MAX);
129 err = ERR_PTR(len);
130 if (unlikely(len < 0))
131 goto error;
132
133 /* The empty path is special. */
134 if (unlikely(!len)) {
135 if (empty)
4043cde8 136 *empty = 1;
3f9f0aa6
LT
137 err = ERR_PTR(-ENOENT);
138 if (!(flags & LOOKUP_EMPTY))
139 goto error;
1da177e4 140 }
3f9f0aa6
LT
141
142 err = ERR_PTR(-ENAMETOOLONG);
143 if (likely(len < PATH_MAX)) {
144 audit_getname(result);
145 return result;
146 }
147
148error:
149 __putname(result);
150 return err;
1da177e4
LT
151}
152
f52e0c11
AV
153char *getname(const char __user * filename)
154{
f7493e5d 155 return getname_flags(filename, 0, NULL);
f52e0c11
AV
156}
157
1da177e4
LT
158#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name)
160{
5ac3a9c2 161 if (unlikely(!audit_dummy_context()))
1da177e4
LT
162 audit_putname(name);
163 else
164 __putname(name);
165}
166EXPORT_SYMBOL(putname);
167#endif
168
e77819e5
LT
169static int check_acl(struct inode *inode, int mask)
170{
84635d68 171#ifdef CONFIG_FS_POSIX_ACL
e77819e5
LT
172 struct posix_acl *acl;
173
e77819e5 174 if (mask & MAY_NOT_BLOCK) {
3567866b
AV
175 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
176 if (!acl)
e77819e5 177 return -EAGAIN;
3567866b
AV
178 /* no ->get_acl() calls in RCU mode... */
179 if (acl == ACL_NOT_CACHED)
180 return -ECHILD;
206b1d09 181 return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
e77819e5
LT
182 }
183
184 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
185
186 /*
4e34e719
CH
187 * A filesystem can force a ACL callback by just never filling the
188 * ACL cache. But normally you'd fill the cache either at inode
189 * instantiation time, or on the first ->get_acl call.
e77819e5 190 *
4e34e719
CH
191 * If the filesystem doesn't have a get_acl() function at all, we'll
192 * just create the negative cache entry.
e77819e5
LT
193 */
194 if (acl == ACL_NOT_CACHED) {
4e34e719
CH
195 if (inode->i_op->get_acl) {
196 acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS);
197 if (IS_ERR(acl))
198 return PTR_ERR(acl);
199 } else {
200 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
201 return -EAGAIN;
202 }
e77819e5
LT
203 }
204
205 if (acl) {
206 int error = posix_acl_permission(inode, acl, mask);
207 posix_acl_release(acl);
208 return error;
209 }
84635d68 210#endif
e77819e5
LT
211
212 return -EAGAIN;
213}
214
5909ccaa 215/*
948409c7 216 * This does the basic permission checking
1da177e4 217 */
7e40145e 218static int acl_permission_check(struct inode *inode, int mask)
1da177e4 219{
26cf46be 220 unsigned int mode = inode->i_mode;
1da177e4 221
8e96e3b7 222 if (likely(uid_eq(current_fsuid(), inode->i_uid)))
1da177e4
LT
223 mode >>= 6;
224 else {
e77819e5 225 if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
7e40145e 226 int error = check_acl(inode, mask);
b74c79e9
NP
227 if (error != -EAGAIN)
228 return error;
1da177e4
LT
229 }
230
231 if (in_group_p(inode->i_gid))
232 mode >>= 3;
233 }
234
235 /*
236 * If the DACs are ok we don't need any capability check.
237 */
9c2c7039 238 if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1da177e4 239 return 0;
5909ccaa
LT
240 return -EACCES;
241}
242
243/**
b74c79e9 244 * generic_permission - check for access rights on a Posix-like filesystem
5909ccaa 245 * @inode: inode to check access rights for
8fd90c8d 246 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
5909ccaa
LT
247 *
248 * Used to check for read/write/execute permissions on a file.
249 * We use "fsuid" for this, letting us set arbitrary permissions
250 * for filesystem access without changing the "normal" uids which
b74c79e9
NP
251 * are used for other things.
252 *
253 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
254 * request cannot be satisfied (eg. requires blocking or too much complexity).
255 * It would then be called again in ref-walk mode.
5909ccaa 256 */
2830ba7f 257int generic_permission(struct inode *inode, int mask)
5909ccaa
LT
258{
259 int ret;
260
261 /*
948409c7 262 * Do the basic permission checks.
5909ccaa 263 */
7e40145e 264 ret = acl_permission_check(inode, mask);
5909ccaa
LT
265 if (ret != -EACCES)
266 return ret;
1da177e4 267
d594e7ec
AV
268 if (S_ISDIR(inode->i_mode)) {
269 /* DACs are overridable for directories */
1a48e2ac 270 if (inode_capable(inode, CAP_DAC_OVERRIDE))
d594e7ec
AV
271 return 0;
272 if (!(mask & MAY_WRITE))
1a48e2ac 273 if (inode_capable(inode, CAP_DAC_READ_SEARCH))
d594e7ec
AV
274 return 0;
275 return -EACCES;
276 }
1da177e4
LT
277 /*
278 * Read/write DACs are always overridable.
d594e7ec
AV
279 * Executable DACs are overridable when there is
280 * at least one exec bit set.
1da177e4 281 */
d594e7ec 282 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
1a48e2ac 283 if (inode_capable(inode, CAP_DAC_OVERRIDE))
1da177e4
LT
284 return 0;
285
286 /*
287 * Searching includes executable on directories, else just read.
288 */
7ea66001 289 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
d594e7ec 290 if (mask == MAY_READ)
1a48e2ac 291 if (inode_capable(inode, CAP_DAC_READ_SEARCH))
1da177e4
LT
292 return 0;
293
294 return -EACCES;
295}
296
3ddcd056
LT
297/*
298 * We _really_ want to just do "generic_permission()" without
299 * even looking at the inode->i_op values. So we keep a cache
300 * flag in inode->i_opflags, that says "this has not special
301 * permission function, use the fast case".
302 */
303static inline int do_inode_permission(struct inode *inode, int mask)
304{
305 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
306 if (likely(inode->i_op->permission))
307 return inode->i_op->permission(inode, mask);
308
309 /* This gets set once for the inode lifetime */
310 spin_lock(&inode->i_lock);
311 inode->i_opflags |= IOP_FASTPERM;
312 spin_unlock(&inode->i_lock);
313 }
314 return generic_permission(inode, mask);
315}
316
cb23beb5 317/**
0bdaea90
DH
318 * __inode_permission - Check for access rights to a given inode
319 * @inode: Inode to check permission on
320 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
cb23beb5 321 *
0bdaea90 322 * Check for read/write/execute permissions on an inode.
948409c7
AG
323 *
324 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
0bdaea90
DH
325 *
326 * This does not check for a read-only file system. You probably want
327 * inode_permission().
cb23beb5 328 */
0bdaea90 329int __inode_permission(struct inode *inode, int mask)
1da177e4 330{
e6305c43 331 int retval;
1da177e4 332
3ddcd056 333 if (unlikely(mask & MAY_WRITE)) {
1da177e4
LT
334 /*
335 * Nobody gets write access to an immutable file.
336 */
337 if (IS_IMMUTABLE(inode))
338 return -EACCES;
339 }
340
3ddcd056 341 retval = do_inode_permission(inode, mask);
1da177e4
LT
342 if (retval)
343 return retval;
344
08ce5f16
SH
345 retval = devcgroup_inode_permission(inode, mask);
346 if (retval)
347 return retval;
348
d09ca739 349 return security_inode_permission(inode, mask);
1da177e4
LT
350}
351
0bdaea90
DH
352/**
353 * sb_permission - Check superblock-level permissions
354 * @sb: Superblock of inode to check permission on
355 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
356 *
357 * Separate out file-system wide checks from inode-specific permission checks.
358 */
359static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
360{
361 if (unlikely(mask & MAY_WRITE)) {
362 umode_t mode = inode->i_mode;
363
364 /* Nobody gets write access to a read-only fs. */
365 if ((sb->s_flags & MS_RDONLY) &&
366 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
367 return -EROFS;
368 }
369 return 0;
370}
371
372/**
373 * inode_permission - Check for access rights to a given inode
374 * @inode: Inode to check permission on
375 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
376 *
377 * Check for read/write/execute permissions on an inode. We use fs[ug]id for
378 * this, letting us set arbitrary permissions for filesystem access without
379 * changing the "normal" UIDs which are used for other things.
380 *
381 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
382 */
383int inode_permission(struct inode *inode, int mask)
384{
385 int retval;
386
387 retval = sb_permission(inode->i_sb, inode, mask);
388 if (retval)
389 return retval;
390 return __inode_permission(inode, mask);
391}
392
5dd784d0
JB
393/**
394 * path_get - get a reference to a path
395 * @path: path to get the reference to
396 *
397 * Given a path increment the reference count to the dentry and the vfsmount.
398 */
399void path_get(struct path *path)
400{
401 mntget(path->mnt);
402 dget(path->dentry);
403}
404EXPORT_SYMBOL(path_get);
405
1d957f9b
JB
406/**
407 * path_put - put a reference to a path
408 * @path: path to put the reference to
409 *
410 * Given a path decrement the reference count to the dentry and the vfsmount.
411 */
412void path_put(struct path *path)
1da177e4 413{
1d957f9b
JB
414 dput(path->dentry);
415 mntput(path->mnt);
1da177e4 416}
1d957f9b 417EXPORT_SYMBOL(path_put);
1da177e4 418
19660af7 419/*
31e6b01f 420 * Path walking has 2 modes, rcu-walk and ref-walk (see
19660af7
AV
421 * Documentation/filesystems/path-lookup.txt). In situations when we can't
422 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
423 * normal reference counts on dentries and vfsmounts to transition to rcu-walk
424 * mode. Refcounts are grabbed at the last known good point before rcu-walk
425 * got stuck, so ref-walk may continue from there. If this is not successful
426 * (eg. a seqcount has changed), then failure is returned and it's up to caller
427 * to restart the path walk from the beginning in ref-walk mode.
31e6b01f 428 */
31e6b01f 429
32a7991b
AV
430static inline void lock_rcu_walk(void)
431{
432 br_read_lock(&vfsmount_lock);
433 rcu_read_lock();
434}
435
436static inline void unlock_rcu_walk(void)
437{
438 rcu_read_unlock();
439 br_read_unlock(&vfsmount_lock);
440}
441
31e6b01f 442/**
19660af7
AV
443 * unlazy_walk - try to switch to ref-walk mode.
444 * @nd: nameidata pathwalk data
445 * @dentry: child of nd->path.dentry or NULL
39191628 446 * Returns: 0 on success, -ECHILD on failure
31e6b01f 447 *
19660af7
AV
448 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
449 * for ref-walk mode. @dentry must be a path found by a do_lookup call on
450 * @nd or NULL. Must be called from rcu-walk context.
31e6b01f 451 */
19660af7 452static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
31e6b01f
NP
453{
454 struct fs_struct *fs = current->fs;
455 struct dentry *parent = nd->path.dentry;
5b6ca027 456 int want_root = 0;
31e6b01f
NP
457
458 BUG_ON(!(nd->flags & LOOKUP_RCU));
5b6ca027
AV
459 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
460 want_root = 1;
31e6b01f
NP
461 spin_lock(&fs->lock);
462 if (nd->root.mnt != fs->root.mnt ||
463 nd->root.dentry != fs->root.dentry)
464 goto err_root;
465 }
466 spin_lock(&parent->d_lock);
19660af7
AV
467 if (!dentry) {
468 if (!__d_rcu_to_refcount(parent, nd->seq))
469 goto err_parent;
470 BUG_ON(nd->inode != parent->d_inode);
471 } else {
94c0d4ec
AV
472 if (dentry->d_parent != parent)
473 goto err_parent;
19660af7
AV
474 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
475 if (!__d_rcu_to_refcount(dentry, nd->seq))
476 goto err_child;
477 /*
478 * If the sequence check on the child dentry passed, then
479 * the child has not been removed from its parent. This
480 * means the parent dentry must be valid and able to take
481 * a reference at this point.
482 */
483 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
484 BUG_ON(!parent->d_count);
485 parent->d_count++;
486 spin_unlock(&dentry->d_lock);
487 }
31e6b01f 488 spin_unlock(&parent->d_lock);
5b6ca027 489 if (want_root) {
31e6b01f
NP
490 path_get(&nd->root);
491 spin_unlock(&fs->lock);
492 }
493 mntget(nd->path.mnt);
494
32a7991b 495 unlock_rcu_walk();
31e6b01f
NP
496 nd->flags &= ~LOOKUP_RCU;
497 return 0;
19660af7
AV
498
499err_child:
31e6b01f 500 spin_unlock(&dentry->d_lock);
19660af7 501err_parent:
31e6b01f
NP
502 spin_unlock(&parent->d_lock);
503err_root:
5b6ca027 504 if (want_root)
31e6b01f
NP
505 spin_unlock(&fs->lock);
506 return -ECHILD;
507}
508
4ce16ef3 509static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
34286d66 510{
4ce16ef3 511 return dentry->d_op->d_revalidate(dentry, flags);
34286d66
NP
512}
513
9f1fafee
AV
514/**
515 * complete_walk - successful completion of path walk
516 * @nd: pointer nameidata
39159de2 517 *
9f1fafee
AV
518 * If we had been in RCU mode, drop out of it and legitimize nd->path.
519 * Revalidate the final result, unless we'd already done that during
520 * the path walk or the filesystem doesn't ask for it. Return 0 on
521 * success, -error on failure. In case of failure caller does not
522 * need to drop nd->path.
39159de2 523 */
9f1fafee 524static int complete_walk(struct nameidata *nd)
39159de2 525{
16c2cd71 526 struct dentry *dentry = nd->path.dentry;
39159de2 527 int status;
39159de2 528
9f1fafee
AV
529 if (nd->flags & LOOKUP_RCU) {
530 nd->flags &= ~LOOKUP_RCU;
531 if (!(nd->flags & LOOKUP_ROOT))
532 nd->root.mnt = NULL;
533 spin_lock(&dentry->d_lock);
534 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
535 spin_unlock(&dentry->d_lock);
32a7991b 536 unlock_rcu_walk();
9f1fafee
AV
537 return -ECHILD;
538 }
539 BUG_ON(nd->inode != dentry->d_inode);
540 spin_unlock(&dentry->d_lock);
541 mntget(nd->path.mnt);
32a7991b 542 unlock_rcu_walk();
9f1fafee
AV
543 }
544
16c2cd71
AV
545 if (likely(!(nd->flags & LOOKUP_JUMPED)))
546 return 0;
547
548 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
39159de2
JL
549 return 0;
550
16c2cd71
AV
551 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
552 return 0;
553
554 /* Note: we do not d_invalidate() */
4ce16ef3 555 status = d_revalidate(dentry, nd->flags);
39159de2
JL
556 if (status > 0)
557 return 0;
558
16c2cd71 559 if (!status)
39159de2 560 status = -ESTALE;
16c2cd71 561
9f1fafee 562 path_put(&nd->path);
39159de2
JL
563 return status;
564}
565
2a737871
AV
566static __always_inline void set_root(struct nameidata *nd)
567{
f7ad3c6b
MS
568 if (!nd->root.mnt)
569 get_fs_root(current->fs, &nd->root);
2a737871
AV
570}
571
6de88d72
AV
572static int link_path_walk(const char *, struct nameidata *);
573
31e6b01f
NP
574static __always_inline void set_root_rcu(struct nameidata *nd)
575{
576 if (!nd->root.mnt) {
577 struct fs_struct *fs = current->fs;
c28cc364
NP
578 unsigned seq;
579
580 do {
581 seq = read_seqcount_begin(&fs->seq);
582 nd->root = fs->root;
c1530019 583 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
c28cc364 584 } while (read_seqcount_retry(&fs->seq, seq));
31e6b01f
NP
585 }
586}
587
f1662356 588static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
1da177e4 589{
31e6b01f
NP
590 int ret;
591
1da177e4
LT
592 if (IS_ERR(link))
593 goto fail;
594
595 if (*link == '/') {
2a737871 596 set_root(nd);
1d957f9b 597 path_put(&nd->path);
2a737871
AV
598 nd->path = nd->root;
599 path_get(&nd->root);
16c2cd71 600 nd->flags |= LOOKUP_JUMPED;
1da177e4 601 }
31e6b01f 602 nd->inode = nd->path.dentry->d_inode;
b4091d5f 603
31e6b01f
NP
604 ret = link_path_walk(link, nd);
605 return ret;
1da177e4 606fail:
1d957f9b 607 path_put(&nd->path);
1da177e4
LT
608 return PTR_ERR(link);
609}
610
1d957f9b 611static void path_put_conditional(struct path *path, struct nameidata *nd)
051d3812
IK
612{
613 dput(path->dentry);
4ac91378 614 if (path->mnt != nd->path.mnt)
051d3812
IK
615 mntput(path->mnt);
616}
617
7b9337aa
NP
618static inline void path_to_nameidata(const struct path *path,
619 struct nameidata *nd)
051d3812 620{
31e6b01f
NP
621 if (!(nd->flags & LOOKUP_RCU)) {
622 dput(nd->path.dentry);
623 if (nd->path.mnt != path->mnt)
624 mntput(nd->path.mnt);
9a229683 625 }
31e6b01f 626 nd->path.mnt = path->mnt;
4ac91378 627 nd->path.dentry = path->dentry;
051d3812
IK
628}
629
b5fb63c1
CH
630/*
631 * Helper to directly jump to a known parsed path from ->follow_link,
632 * caller must have taken a reference to path beforehand.
633 */
634void nd_jump_link(struct nameidata *nd, struct path *path)
635{
636 path_put(&nd->path);
637
638 nd->path = *path;
639 nd->inode = nd->path.dentry->d_inode;
640 nd->flags |= LOOKUP_JUMPED;
641
642 BUG_ON(nd->inode->i_op->follow_link);
643}
644
574197e0
AV
645static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
646{
647 struct inode *inode = link->dentry->d_inode;
6d7b5aae 648 if (inode->i_op->put_link)
574197e0
AV
649 inode->i_op->put_link(link->dentry, nd, cookie);
650 path_put(link);
651}
652
def4af30 653static __always_inline int
574197e0 654follow_link(struct path *link, struct nameidata *nd, void **p)
1da177e4 655{
7b9337aa 656 struct dentry *dentry = link->dentry;
6d7b5aae
AV
657 int error;
658 char *s;
1da177e4 659
844a3917
AV
660 BUG_ON(nd->flags & LOOKUP_RCU);
661
0e794589
AV
662 if (link->mnt == nd->path.mnt)
663 mntget(link->mnt);
664
6d7b5aae
AV
665 error = -ELOOP;
666 if (unlikely(current->total_link_count >= 40))
667 goto out_put_nd_path;
668
574197e0
AV
669 cond_resched();
670 current->total_link_count++;
671
68ac1234 672 touch_atime(link);
1da177e4 673 nd_set_link(nd, NULL);
cd4e91d3 674
36f3b4f6 675 error = security_inode_follow_link(link->dentry, nd);
6d7b5aae
AV
676 if (error)
677 goto out_put_nd_path;
36f3b4f6 678
86acdca1 679 nd->last_type = LAST_BIND;
def4af30
AV
680 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
681 error = PTR_ERR(*p);
6d7b5aae 682 if (IS_ERR(*p))
408ef013 683 goto out_put_nd_path;
6d7b5aae
AV
684
685 error = 0;
686 s = nd_get_link(nd);
687 if (s) {
688 error = __vfs_follow_link(nd, s);
b5fb63c1
CH
689 if (unlikely(error))
690 put_link(nd, link, *p);
1da177e4 691 }
6d7b5aae
AV
692
693 return error;
694
695out_put_nd_path:
696 path_put(&nd->path);
6d7b5aae 697 path_put(link);
1da177e4
LT
698 return error;
699}
700
31e6b01f
NP
701static int follow_up_rcu(struct path *path)
702{
0714a533
AV
703 struct mount *mnt = real_mount(path->mnt);
704 struct mount *parent;
31e6b01f
NP
705 struct dentry *mountpoint;
706
0714a533
AV
707 parent = mnt->mnt_parent;
708 if (&parent->mnt == path->mnt)
31e6b01f 709 return 0;
a73324da 710 mountpoint = mnt->mnt_mountpoint;
31e6b01f 711 path->dentry = mountpoint;
0714a533 712 path->mnt = &parent->mnt;
31e6b01f
NP
713 return 1;
714}
715
f015f126
DH
716/*
717 * follow_up - Find the mountpoint of path's vfsmount
718 *
719 * Given a path, find the mountpoint of its source file system.
720 * Replace @path with the path of the mountpoint in the parent mount.
721 * Up is towards /.
722 *
723 * Return 1 if we went up a level and 0 if we were already at the
724 * root.
725 */
bab77ebf 726int follow_up(struct path *path)
1da177e4 727{
0714a533
AV
728 struct mount *mnt = real_mount(path->mnt);
729 struct mount *parent;
1da177e4 730 struct dentry *mountpoint;
99b7db7b 731
962830df 732 br_read_lock(&vfsmount_lock);
0714a533 733 parent = mnt->mnt_parent;
3c0a6163 734 if (parent == mnt) {
962830df 735 br_read_unlock(&vfsmount_lock);
1da177e4
LT
736 return 0;
737 }
0714a533 738 mntget(&parent->mnt);
a73324da 739 mountpoint = dget(mnt->mnt_mountpoint);
962830df 740 br_read_unlock(&vfsmount_lock);
bab77ebf
AV
741 dput(path->dentry);
742 path->dentry = mountpoint;
743 mntput(path->mnt);
0714a533 744 path->mnt = &parent->mnt;
1da177e4
LT
745 return 1;
746}
747
b5c84bf6 748/*
9875cf80
DH
749 * Perform an automount
750 * - return -EISDIR to tell follow_managed() to stop and return the path we
751 * were called with.
1da177e4 752 */
9875cf80
DH
753static int follow_automount(struct path *path, unsigned flags,
754 bool *need_mntput)
31e6b01f 755{
9875cf80 756 struct vfsmount *mnt;
ea5b778a 757 int err;
9875cf80
DH
758
759 if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
760 return -EREMOTE;
761
0ec26fd0
MS
762 /* We don't want to mount if someone's just doing a stat -
763 * unless they're stat'ing a directory and appended a '/' to
764 * the name.
765 *
766 * We do, however, want to mount if someone wants to open or
767 * create a file of any type under the mountpoint, wants to
768 * traverse through the mountpoint or wants to open the
769 * mounted directory. Also, autofs may mark negative dentries
770 * as being automount points. These will need the attentions
771 * of the daemon to instantiate them before they can be used.
9875cf80 772 */
0ec26fd0 773 if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
d94c177b 774 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
0ec26fd0
MS
775 path->dentry->d_inode)
776 return -EISDIR;
777
9875cf80
DH
778 current->total_link_count++;
779 if (current->total_link_count >= 40)
780 return -ELOOP;
781
782 mnt = path->dentry->d_op->d_automount(path);
783 if (IS_ERR(mnt)) {
784 /*
785 * The filesystem is allowed to return -EISDIR here to indicate
786 * it doesn't want to automount. For instance, autofs would do
787 * this so that its userspace daemon can mount on this dentry.
788 *
789 * However, we can only permit this if it's a terminal point in
790 * the path being looked up; if it wasn't then the remainder of
791 * the path is inaccessible and we should say so.
792 */
49084c3b 793 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
9875cf80
DH
794 return -EREMOTE;
795 return PTR_ERR(mnt);
31e6b01f 796 }
ea5b778a 797
9875cf80
DH
798 if (!mnt) /* mount collision */
799 return 0;
31e6b01f 800
8aef1884
AV
801 if (!*need_mntput) {
802 /* lock_mount() may release path->mnt on error */
803 mntget(path->mnt);
804 *need_mntput = true;
805 }
19a167af 806 err = finish_automount(mnt, path);
9875cf80 807
ea5b778a
DH
808 switch (err) {
809 case -EBUSY:
810 /* Someone else made a mount here whilst we were busy */
19a167af 811 return 0;
ea5b778a 812 case 0:
8aef1884 813 path_put(path);
ea5b778a
DH
814 path->mnt = mnt;
815 path->dentry = dget(mnt->mnt_root);
ea5b778a 816 return 0;
19a167af
AV
817 default:
818 return err;
ea5b778a 819 }
19a167af 820
463ffb2e
AV
821}
822
9875cf80
DH
823/*
824 * Handle a dentry that is managed in some way.
cc53ce53 825 * - Flagged for transit management (autofs)
9875cf80
DH
826 * - Flagged as mountpoint
827 * - Flagged as automount point
828 *
829 * This may only be called in refwalk mode.
830 *
831 * Serialization is taken care of in namespace.c
832 */
833static int follow_managed(struct path *path, unsigned flags)
1da177e4 834{
8aef1884 835 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
9875cf80
DH
836 unsigned managed;
837 bool need_mntput = false;
8aef1884 838 int ret = 0;
9875cf80
DH
839
840 /* Given that we're not holding a lock here, we retain the value in a
841 * local variable for each dentry as we look at it so that we don't see
842 * the components of that value change under us */
843 while (managed = ACCESS_ONCE(path->dentry->d_flags),
844 managed &= DCACHE_MANAGED_DENTRY,
845 unlikely(managed != 0)) {
cc53ce53
DH
846 /* Allow the filesystem to manage the transit without i_mutex
847 * being held. */
848 if (managed & DCACHE_MANAGE_TRANSIT) {
849 BUG_ON(!path->dentry->d_op);
850 BUG_ON(!path->dentry->d_op->d_manage);
1aed3e42 851 ret = path->dentry->d_op->d_manage(path->dentry, false);
cc53ce53 852 if (ret < 0)
8aef1884 853 break;
cc53ce53
DH
854 }
855
9875cf80
DH
856 /* Transit to a mounted filesystem. */
857 if (managed & DCACHE_MOUNTED) {
858 struct vfsmount *mounted = lookup_mnt(path);
859 if (mounted) {
860 dput(path->dentry);
861 if (need_mntput)
862 mntput(path->mnt);
863 path->mnt = mounted;
864 path->dentry = dget(mounted->mnt_root);
865 need_mntput = true;
866 continue;
867 }
868
869 /* Something is mounted on this dentry in another
870 * namespace and/or whatever was mounted there in this
871 * namespace got unmounted before we managed to get the
872 * vfsmount_lock */
873 }
874
875 /* Handle an automount point */
876 if (managed & DCACHE_NEED_AUTOMOUNT) {
877 ret = follow_automount(path, flags, &need_mntput);
878 if (ret < 0)
8aef1884 879 break;
9875cf80
DH
880 continue;
881 }
882
883 /* We didn't change the current path point */
884 break;
1da177e4 885 }
8aef1884
AV
886
887 if (need_mntput && path->mnt == mnt)
888 mntput(path->mnt);
889 if (ret == -EISDIR)
890 ret = 0;
a3fbbde7 891 return ret < 0 ? ret : need_mntput;
1da177e4
LT
892}
893
cc53ce53 894int follow_down_one(struct path *path)
1da177e4
LT
895{
896 struct vfsmount *mounted;
897
1c755af4 898 mounted = lookup_mnt(path);
1da177e4 899 if (mounted) {
9393bd07
AV
900 dput(path->dentry);
901 mntput(path->mnt);
902 path->mnt = mounted;
903 path->dentry = dget(mounted->mnt_root);
1da177e4
LT
904 return 1;
905 }
906 return 0;
907}
908
62a7375e
IK
909static inline bool managed_dentry_might_block(struct dentry *dentry)
910{
911 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
912 dentry->d_op->d_manage(dentry, true) < 0);
913}
914
9875cf80 915/*
287548e4
AV
916 * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
917 * we meet a managed dentry that would need blocking.
9875cf80
DH
918 */
919static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
287548e4 920 struct inode **inode)
9875cf80 921{
62a7375e 922 for (;;) {
c7105365 923 struct mount *mounted;
62a7375e
IK
924 /*
925 * Don't forget we might have a non-mountpoint managed dentry
926 * that wants to block transit.
927 */
287548e4 928 if (unlikely(managed_dentry_might_block(path->dentry)))
ab90911f 929 return false;
62a7375e
IK
930
931 if (!d_mountpoint(path->dentry))
932 break;
933
9875cf80
DH
934 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
935 if (!mounted)
936 break;
c7105365
AV
937 path->mnt = &mounted->mnt;
938 path->dentry = mounted->mnt.mnt_root;
a3fbbde7 939 nd->flags |= LOOKUP_JUMPED;
9875cf80 940 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
59430262
LT
941 /*
942 * Update the inode too. We don't need to re-check the
943 * dentry sequence number here after this d_inode read,
944 * because a mount-point is always pinned.
945 */
946 *inode = path->dentry->d_inode;
9875cf80 947 }
9875cf80
DH
948 return true;
949}
950
dea39376 951static void follow_mount_rcu(struct nameidata *nd)
287548e4 952{
dea39376 953 while (d_mountpoint(nd->path.dentry)) {
c7105365 954 struct mount *mounted;
dea39376 955 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
287548e4
AV
956 if (!mounted)
957 break;
c7105365
AV
958 nd->path.mnt = &mounted->mnt;
959 nd->path.dentry = mounted->mnt.mnt_root;
dea39376 960 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
287548e4
AV
961 }
962}
963
31e6b01f
NP
964static int follow_dotdot_rcu(struct nameidata *nd)
965{
31e6b01f
NP
966 set_root_rcu(nd);
967
9875cf80 968 while (1) {
31e6b01f
NP
969 if (nd->path.dentry == nd->root.dentry &&
970 nd->path.mnt == nd->root.mnt) {
971 break;
972 }
973 if (nd->path.dentry != nd->path.mnt->mnt_root) {
974 struct dentry *old = nd->path.dentry;
975 struct dentry *parent = old->d_parent;
976 unsigned seq;
977
978 seq = read_seqcount_begin(&parent->d_seq);
979 if (read_seqcount_retry(&old->d_seq, nd->seq))
ef7562d5 980 goto failed;
31e6b01f
NP
981 nd->path.dentry = parent;
982 nd->seq = seq;
983 break;
984 }
985 if (!follow_up_rcu(&nd->path))
986 break;
987 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
31e6b01f 988 }
dea39376
AV
989 follow_mount_rcu(nd);
990 nd->inode = nd->path.dentry->d_inode;
31e6b01f 991 return 0;
ef7562d5
AV
992
993failed:
994 nd->flags &= ~LOOKUP_RCU;
5b6ca027
AV
995 if (!(nd->flags & LOOKUP_ROOT))
996 nd->root.mnt = NULL;
32a7991b 997 unlock_rcu_walk();
ef7562d5 998 return -ECHILD;
31e6b01f
NP
999}
1000
cc53ce53
DH
1001/*
1002 * Follow down to the covering mount currently visible to userspace. At each
1003 * point, the filesystem owning that dentry may be queried as to whether the
1004 * caller is permitted to proceed or not.
cc53ce53 1005 */
7cc90cc3 1006int follow_down(struct path *path)
cc53ce53
DH
1007{
1008 unsigned managed;
1009 int ret;
1010
1011 while (managed = ACCESS_ONCE(path->dentry->d_flags),
1012 unlikely(managed & DCACHE_MANAGED_DENTRY)) {
1013 /* Allow the filesystem to manage the transit without i_mutex
1014 * being held.
1015 *
1016 * We indicate to the filesystem if someone is trying to mount
1017 * something here. This gives autofs the chance to deny anyone
1018 * other than its daemon the right to mount on its
1019 * superstructure.
1020 *
1021 * The filesystem may sleep at this point.
1022 */
1023 if (managed & DCACHE_MANAGE_TRANSIT) {
1024 BUG_ON(!path->dentry->d_op);
1025 BUG_ON(!path->dentry->d_op->d_manage);
ab90911f 1026 ret = path->dentry->d_op->d_manage(
1aed3e42 1027 path->dentry, false);
cc53ce53
DH
1028 if (ret < 0)
1029 return ret == -EISDIR ? 0 : ret;
1030 }
1031
1032 /* Transit to a mounted filesystem. */
1033 if (managed & DCACHE_MOUNTED) {
1034 struct vfsmount *mounted = lookup_mnt(path);
1035 if (!mounted)
1036 break;
1037 dput(path->dentry);
1038 mntput(path->mnt);
1039 path->mnt = mounted;
1040 path->dentry = dget(mounted->mnt_root);
1041 continue;
1042 }
1043
1044 /* Don't handle automount points here */
1045 break;
1046 }
1047 return 0;
1048}
1049
9875cf80
DH
1050/*
1051 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
1052 */
1053static void follow_mount(struct path *path)
1054{
1055 while (d_mountpoint(path->dentry)) {
1056 struct vfsmount *mounted = lookup_mnt(path);
1057 if (!mounted)
1058 break;
1059 dput(path->dentry);
1060 mntput(path->mnt);
1061 path->mnt = mounted;
1062 path->dentry = dget(mounted->mnt_root);
1063 }
1064}
1065
31e6b01f 1066static void follow_dotdot(struct nameidata *nd)
1da177e4 1067{
2a737871 1068 set_root(nd);
e518ddb7 1069
1da177e4 1070 while(1) {
4ac91378 1071 struct dentry *old = nd->path.dentry;
1da177e4 1072
2a737871
AV
1073 if (nd->path.dentry == nd->root.dentry &&
1074 nd->path.mnt == nd->root.mnt) {
1da177e4
LT
1075 break;
1076 }
4ac91378 1077 if (nd->path.dentry != nd->path.mnt->mnt_root) {
3088dd70
AV
1078 /* rare case of legitimate dget_parent()... */
1079 nd->path.dentry = dget_parent(nd->path.dentry);
1da177e4
LT
1080 dput(old);
1081 break;
1082 }
3088dd70 1083 if (!follow_up(&nd->path))
1da177e4 1084 break;
1da177e4 1085 }
79ed0226 1086 follow_mount(&nd->path);
31e6b01f 1087 nd->inode = nd->path.dentry->d_inode;
1da177e4
LT
1088}
1089
baa03890 1090/*
bad61189
MS
1091 * This looks up the name in dcache, possibly revalidates the old dentry and
1092 * allocates a new one if not found or not valid. In the need_lookup argument
1093 * returns whether i_op->lookup is necessary.
1094 *
1095 * dir->d_inode->i_mutex must be held
baa03890 1096 */
bad61189 1097static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
201f956e 1098 unsigned int flags, bool *need_lookup)
baa03890 1099{
baa03890 1100 struct dentry *dentry;
bad61189 1101 int error;
baa03890 1102
bad61189
MS
1103 *need_lookup = false;
1104 dentry = d_lookup(dir, name);
1105 if (dentry) {
1106 if (d_need_lookup(dentry)) {
1107 *need_lookup = true;
1108 } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
201f956e 1109 error = d_revalidate(dentry, flags);
bad61189
MS
1110 if (unlikely(error <= 0)) {
1111 if (error < 0) {
1112 dput(dentry);
1113 return ERR_PTR(error);
1114 } else if (!d_invalidate(dentry)) {
1115 dput(dentry);
1116 dentry = NULL;
1117 }
1118 }
1119 }
1120 }
baa03890 1121
bad61189
MS
1122 if (!dentry) {
1123 dentry = d_alloc(dir, name);
1124 if (unlikely(!dentry))
1125 return ERR_PTR(-ENOMEM);
baa03890 1126
bad61189 1127 *need_lookup = true;
baa03890
NP
1128 }
1129 return dentry;
1130}
1131
44396f4b 1132/*
bad61189
MS
1133 * Call i_op->lookup on the dentry. The dentry must be negative but may be
1134 * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
1135 *
1136 * dir->d_inode->i_mutex must be held
44396f4b 1137 */
bad61189 1138static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
72bd866a 1139 unsigned int flags)
44396f4b 1140{
44396f4b
JB
1141 struct dentry *old;
1142
1143 /* Don't create child dentry for a dead directory. */
bad61189 1144 if (unlikely(IS_DEADDIR(dir))) {
e188dc02 1145 dput(dentry);
44396f4b 1146 return ERR_PTR(-ENOENT);
e188dc02 1147 }
44396f4b 1148
72bd866a 1149 old = dir->i_op->lookup(dir, dentry, flags);
44396f4b
JB
1150 if (unlikely(old)) {
1151 dput(dentry);
1152 dentry = old;
1153 }
1154 return dentry;
1155}
1156
a3255546 1157static struct dentry *__lookup_hash(struct qstr *name,
72bd866a 1158 struct dentry *base, unsigned int flags)
a3255546 1159{
bad61189 1160 bool need_lookup;
a3255546
AV
1161 struct dentry *dentry;
1162
72bd866a 1163 dentry = lookup_dcache(name, base, flags, &need_lookup);
bad61189
MS
1164 if (!need_lookup)
1165 return dentry;
a3255546 1166
72bd866a 1167 return lookup_real(base->d_inode, dentry, flags);
a3255546
AV
1168}
1169
1da177e4
LT
1170/*
1171 * It's more convoluted than I'd like it to be, but... it's still fairly
1172 * small and for now I'd prefer to have fast path as straight as possible.
1173 * It _is_ time-critical.
1174 */
697f514d
MS
1175static int lookup_fast(struct nameidata *nd, struct qstr *name,
1176 struct path *path, struct inode **inode)
1da177e4 1177{
4ac91378 1178 struct vfsmount *mnt = nd->path.mnt;
31e6b01f 1179 struct dentry *dentry, *parent = nd->path.dentry;
5a18fff2
AV
1180 int need_reval = 1;
1181 int status = 1;
9875cf80
DH
1182 int err;
1183
b04f784e
NP
1184 /*
1185 * Rename seqlock is not required here because in the off chance
1186 * of a false negative due to a concurrent rename, we're going to
1187 * do the non-racy lookup, below.
1188 */
31e6b01f
NP
1189 if (nd->flags & LOOKUP_RCU) {
1190 unsigned seq;
12f8ad4b 1191 dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
5a18fff2
AV
1192 if (!dentry)
1193 goto unlazy;
1194
12f8ad4b
LT
1195 /*
1196 * This sequence count validates that the inode matches
1197 * the dentry name information from lookup.
1198 */
1199 *inode = dentry->d_inode;
1200 if (read_seqcount_retry(&dentry->d_seq, seq))
1201 return -ECHILD;
1202
1203 /*
1204 * This sequence count validates that the parent had no
1205 * changes while we did the lookup of the dentry above.
1206 *
1207 * The memory barrier in read_seqcount_begin of child is
1208 * enough, we can use __read_seqcount_retry here.
1209 */
31e6b01f
NP
1210 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1211 return -ECHILD;
31e6b01f 1212 nd->seq = seq;
5a18fff2 1213
fa4ee159
MS
1214 if (unlikely(d_need_lookup(dentry)))
1215 goto unlazy;
24643087 1216 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
4ce16ef3 1217 status = d_revalidate(dentry, nd->flags);
5a18fff2
AV
1218 if (unlikely(status <= 0)) {
1219 if (status != -ECHILD)
1220 need_reval = 0;
1221 goto unlazy;
1222 }
24643087 1223 }
31e6b01f
NP
1224 path->mnt = mnt;
1225 path->dentry = dentry;
d6e9bd25
AV
1226 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
1227 goto unlazy;
1228 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
1229 goto unlazy;
1230 return 0;
5a18fff2 1231unlazy:
19660af7
AV
1232 if (unlazy_walk(nd, dentry))
1233 return -ECHILD;
5a18fff2
AV
1234 } else {
1235 dentry = __d_lookup(parent, name);
9875cf80 1236 }
5a18fff2 1237
81e6f520
AV
1238 if (unlikely(!dentry))
1239 goto need_lookup;
1240
1241 if (unlikely(d_need_lookup(dentry))) {
44396f4b 1242 dput(dentry);
81e6f520 1243 goto need_lookup;
5a18fff2 1244 }
81e6f520 1245
5a18fff2 1246 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
4ce16ef3 1247 status = d_revalidate(dentry, nd->flags);
5a18fff2
AV
1248 if (unlikely(status <= 0)) {
1249 if (status < 0) {
1250 dput(dentry);
1251 return status;
1252 }
1253 if (!d_invalidate(dentry)) {
1254 dput(dentry);
81e6f520 1255 goto need_lookup;
5a18fff2 1256 }
24643087 1257 }
697f514d 1258
9875cf80
DH
1259 path->mnt = mnt;
1260 path->dentry = dentry;
1261 err = follow_managed(path, nd->flags);
89312214
IK
1262 if (unlikely(err < 0)) {
1263 path_put_conditional(path, nd);
9875cf80 1264 return err;
89312214 1265 }
a3fbbde7
AV
1266 if (err)
1267 nd->flags |= LOOKUP_JUMPED;
9875cf80 1268 *inode = path->dentry->d_inode;
1da177e4 1269 return 0;
81e6f520
AV
1270
1271need_lookup:
697f514d
MS
1272 return 1;
1273}
1274
1275/* Fast lookup failed, do it the slow way */
1276static int lookup_slow(struct nameidata *nd, struct qstr *name,
1277 struct path *path)
1278{
1279 struct dentry *dentry, *parent;
1280 int err;
1281
1282 parent = nd->path.dentry;
81e6f520
AV
1283 BUG_ON(nd->inode != parent->d_inode);
1284
1285 mutex_lock(&parent->d_inode->i_mutex);
72bd866a 1286 dentry = __lookup_hash(name, parent, nd->flags);
81e6f520
AV
1287 mutex_unlock(&parent->d_inode->i_mutex);
1288 if (IS_ERR(dentry))
1289 return PTR_ERR(dentry);
697f514d
MS
1290 path->mnt = nd->path.mnt;
1291 path->dentry = dentry;
1292 err = follow_managed(path, nd->flags);
1293 if (unlikely(err < 0)) {
1294 path_put_conditional(path, nd);
1295 return err;
1296 }
1297 if (err)
1298 nd->flags |= LOOKUP_JUMPED;
1299 return 0;
1da177e4
LT
1300}
1301
52094c8a
AV
1302static inline int may_lookup(struct nameidata *nd)
1303{
1304 if (nd->flags & LOOKUP_RCU) {
4ad5abb3 1305 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
52094c8a
AV
1306 if (err != -ECHILD)
1307 return err;
19660af7 1308 if (unlazy_walk(nd, NULL))
52094c8a
AV
1309 return -ECHILD;
1310 }
4ad5abb3 1311 return inode_permission(nd->inode, MAY_EXEC);
52094c8a
AV
1312}
1313
9856fa1b
AV
1314static inline int handle_dots(struct nameidata *nd, int type)
1315{
1316 if (type == LAST_DOTDOT) {
1317 if (nd->flags & LOOKUP_RCU) {
1318 if (follow_dotdot_rcu(nd))
1319 return -ECHILD;
1320 } else
1321 follow_dotdot(nd);
1322 }
1323 return 0;
1324}
1325
951361f9
AV
1326static void terminate_walk(struct nameidata *nd)
1327{
1328 if (!(nd->flags & LOOKUP_RCU)) {
1329 path_put(&nd->path);
1330 } else {
1331 nd->flags &= ~LOOKUP_RCU;
5b6ca027
AV
1332 if (!(nd->flags & LOOKUP_ROOT))
1333 nd->root.mnt = NULL;
32a7991b 1334 unlock_rcu_walk();
951361f9
AV
1335 }
1336}
1337
3ddcd056
LT
1338/*
1339 * Do we need to follow links? We _really_ want to be able
1340 * to do this check without having to look at inode->i_op,
1341 * so we keep a cache of "no, this doesn't need follow_link"
1342 * for the common case.
1343 */
7813b94a 1344static inline int should_follow_link(struct inode *inode, int follow)
3ddcd056
LT
1345{
1346 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1347 if (likely(inode->i_op->follow_link))
1348 return follow;
1349
1350 /* This gets set once for the inode lifetime */
1351 spin_lock(&inode->i_lock);
1352 inode->i_opflags |= IOP_NOFOLLOW;
1353 spin_unlock(&inode->i_lock);
1354 }
1355 return 0;
1356}
1357
ce57dfc1
AV
1358static inline int walk_component(struct nameidata *nd, struct path *path,
1359 struct qstr *name, int type, int follow)
1360{
1361 struct inode *inode;
1362 int err;
1363 /*
1364 * "." and ".." are special - ".." especially so because it has
1365 * to be able to know about the current root directory and
1366 * parent relationships.
1367 */
1368 if (unlikely(type != LAST_NORM))
1369 return handle_dots(nd, type);
697f514d 1370 err = lookup_fast(nd, name, path, &inode);
ce57dfc1 1371 if (unlikely(err)) {
697f514d
MS
1372 if (err < 0)
1373 goto out_err;
1374
1375 err = lookup_slow(nd, name, path);
1376 if (err < 0)
1377 goto out_err;
1378
1379 inode = path->dentry->d_inode;
ce57dfc1 1380 }
697f514d
MS
1381 err = -ENOENT;
1382 if (!inode)
1383 goto out_path_put;
1384
7813b94a 1385 if (should_follow_link(inode, follow)) {
19660af7
AV
1386 if (nd->flags & LOOKUP_RCU) {
1387 if (unlikely(unlazy_walk(nd, path->dentry))) {
697f514d
MS
1388 err = -ECHILD;
1389 goto out_err;
19660af7
AV
1390 }
1391 }
ce57dfc1
AV
1392 BUG_ON(inode != path->dentry->d_inode);
1393 return 1;
1394 }
1395 path_to_nameidata(path, nd);
1396 nd->inode = inode;
1397 return 0;
697f514d
MS
1398
1399out_path_put:
1400 path_to_nameidata(path, nd);
1401out_err:
1402 terminate_walk(nd);
1403 return err;
ce57dfc1
AV
1404}
1405
b356379a
AV
1406/*
1407 * This limits recursive symlink follows to 8, while
1408 * limiting consecutive symlinks to 40.
1409 *
1410 * Without that kind of total limit, nasty chains of consecutive
1411 * symlinks can cause almost arbitrarily long lookups.
1412 */
1413static inline int nested_symlink(struct path *path, struct nameidata *nd)
1414{
1415 int res;
1416
b356379a
AV
1417 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1418 path_put_conditional(path, nd);
1419 path_put(&nd->path);
1420 return -ELOOP;
1421 }
1a4022f8 1422 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
b356379a
AV
1423
1424 nd->depth++;
1425 current->link_count++;
1426
1427 do {
1428 struct path link = *path;
1429 void *cookie;
574197e0
AV
1430
1431 res = follow_link(&link, nd, &cookie);
6d7b5aae
AV
1432 if (res)
1433 break;
1434 res = walk_component(nd, path, &nd->last,
1435 nd->last_type, LOOKUP_FOLLOW);
574197e0 1436 put_link(nd, &link, cookie);
b356379a
AV
1437 } while (res > 0);
1438
1439 current->link_count--;
1440 nd->depth--;
1441 return res;
1442}
1443
3ddcd056
LT
1444/*
1445 * We really don't want to look at inode->i_op->lookup
1446 * when we don't have to. So we keep a cache bit in
1447 * the inode ->i_opflags field that says "yes, we can
1448 * do lookup on this inode".
1449 */
1450static inline int can_lookup(struct inode *inode)
1451{
1452 if (likely(inode->i_opflags & IOP_LOOKUP))
1453 return 1;
1454 if (likely(!inode->i_op->lookup))
1455 return 0;
1456
1457 /* We do this once for the lifetime of the inode */
1458 spin_lock(&inode->i_lock);
1459 inode->i_opflags |= IOP_LOOKUP;
1460 spin_unlock(&inode->i_lock);
1461 return 1;
1462}
1463
bfcfaa77
LT
1464/*
1465 * We can do the critical dentry name comparison and hashing
1466 * operations one word at a time, but we are limited to:
1467 *
1468 * - Architectures with fast unaligned word accesses. We could
1469 * do a "get_unaligned()" if this helps and is sufficiently
1470 * fast.
1471 *
1472 * - Little-endian machines (so that we can generate the mask
1473 * of low bytes efficiently). Again, we *could* do a byte
1474 * swapping load on big-endian architectures if that is not
1475 * expensive enough to make the optimization worthless.
1476 *
1477 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1478 * do not trap on the (extremely unlikely) case of a page
1479 * crossing operation.
1480 *
1481 * - Furthermore, we need an efficient 64-bit compile for the
1482 * 64-bit case in order to generate the "number of bytes in
1483 * the final mask". Again, that could be replaced with a
1484 * efficient population count instruction or similar.
1485 */
1486#ifdef CONFIG_DCACHE_WORD_ACCESS
1487
f68e556e 1488#include <asm/word-at-a-time.h>
bfcfaa77 1489
f68e556e 1490#ifdef CONFIG_64BIT
bfcfaa77
LT
1491
1492static inline unsigned int fold_hash(unsigned long hash)
1493{
1494 hash += hash >> (8*sizeof(int));
1495 return hash;
1496}
1497
1498#else /* 32-bit case */
1499
bfcfaa77
LT
1500#define fold_hash(x) (x)
1501
1502#endif
1503
1504unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1505{
1506 unsigned long a, mask;
1507 unsigned long hash = 0;
1508
1509 for (;;) {
e419b4cc 1510 a = load_unaligned_zeropad(name);
bfcfaa77
LT
1511 if (len < sizeof(unsigned long))
1512 break;
1513 hash += a;
f132c5be 1514 hash *= 9;
bfcfaa77
LT
1515 name += sizeof(unsigned long);
1516 len -= sizeof(unsigned long);
1517 if (!len)
1518 goto done;
1519 }
1520 mask = ~(~0ul << len*8);
1521 hash += mask & a;
1522done:
1523 return fold_hash(hash);
1524}
1525EXPORT_SYMBOL(full_name_hash);
1526
bfcfaa77
LT
1527/*
1528 * Calculate the length and hash of the path component, and
1529 * return the length of the component;
1530 */
1531static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1532{
36126f8f
LT
1533 unsigned long a, b, adata, bdata, mask, hash, len;
1534 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
bfcfaa77
LT
1535
1536 hash = a = 0;
1537 len = -sizeof(unsigned long);
1538 do {
1539 hash = (hash + a) * 9;
1540 len += sizeof(unsigned long);
e419b4cc 1541 a = load_unaligned_zeropad(name+len);
36126f8f
LT
1542 b = a ^ REPEAT_BYTE('/');
1543 } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
1544
1545 adata = prep_zero_mask(a, adata, &constants);
1546 bdata = prep_zero_mask(b, bdata, &constants);
1547
1548 mask = create_zero_mask(adata | bdata);
1549
1550 hash += a & zero_bytemask(mask);
bfcfaa77
LT
1551 *hashp = fold_hash(hash);
1552
36126f8f 1553 return len + find_zero(mask);
bfcfaa77
LT
1554}
1555
1556#else
1557
0145acc2
LT
1558unsigned int full_name_hash(const unsigned char *name, unsigned int len)
1559{
1560 unsigned long hash = init_name_hash();
1561 while (len--)
1562 hash = partial_name_hash(*name++, hash);
1563 return end_name_hash(hash);
1564}
ae942ae7 1565EXPORT_SYMBOL(full_name_hash);
0145acc2 1566
200e9ef7
LT
1567/*
1568 * We know there's a real path component here of at least
1569 * one character.
1570 */
1571static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1572{
1573 unsigned long hash = init_name_hash();
1574 unsigned long len = 0, c;
1575
1576 c = (unsigned char)*name;
1577 do {
1578 len++;
1579 hash = partial_name_hash(c, hash);
1580 c = (unsigned char)name[len];
1581 } while (c && c != '/');
1582 *hashp = end_name_hash(hash);
1583 return len;
1584}
1585
bfcfaa77
LT
1586#endif
1587
1da177e4
LT
1588/*
1589 * Name resolution.
ea3834d9
PM
1590 * This is the basic name resolution function, turning a pathname into
1591 * the final dentry. We expect 'base' to be positive and a directory.
1da177e4 1592 *
ea3834d9
PM
1593 * Returns 0 and nd will have valid dentry and mnt on success.
1594 * Returns error and drops reference to input namei data on failure.
1da177e4 1595 */
6de88d72 1596static int link_path_walk(const char *name, struct nameidata *nd)
1da177e4
LT
1597{
1598 struct path next;
1da177e4 1599 int err;
1da177e4
LT
1600
1601 while (*name=='/')
1602 name++;
1603 if (!*name)
086e183a 1604 return 0;
1da177e4 1605
1da177e4
LT
1606 /* At this point we know we have a real path component. */
1607 for(;;) {
1da177e4 1608 struct qstr this;
200e9ef7 1609 long len;
fe479a58 1610 int type;
1da177e4 1611
52094c8a 1612 err = may_lookup(nd);
1da177e4
LT
1613 if (err)
1614 break;
1615
200e9ef7 1616 len = hash_name(name, &this.hash);
1da177e4 1617 this.name = name;
200e9ef7 1618 this.len = len;
1da177e4 1619
fe479a58 1620 type = LAST_NORM;
200e9ef7 1621 if (name[0] == '.') switch (len) {
fe479a58 1622 case 2:
200e9ef7 1623 if (name[1] == '.') {
fe479a58 1624 type = LAST_DOTDOT;
16c2cd71
AV
1625 nd->flags |= LOOKUP_JUMPED;
1626 }
fe479a58
AV
1627 break;
1628 case 1:
1629 type = LAST_DOT;
1630 }
5a202bcd
AV
1631 if (likely(type == LAST_NORM)) {
1632 struct dentry *parent = nd->path.dentry;
16c2cd71 1633 nd->flags &= ~LOOKUP_JUMPED;
5a202bcd
AV
1634 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1635 err = parent->d_op->d_hash(parent, nd->inode,
1636 &this);
1637 if (err < 0)
1638 break;
1639 }
1640 }
fe479a58 1641
200e9ef7 1642 if (!name[len])
1da177e4 1643 goto last_component;
200e9ef7
LT
1644 /*
1645 * If it wasn't NUL, we know it was '/'. Skip that
1646 * slash, and continue until no more slashes.
1647 */
1648 do {
1649 len++;
1650 } while (unlikely(name[len] == '/'));
1651 if (!name[len])
b356379a 1652 goto last_component;
200e9ef7 1653 name += len;
1da177e4 1654
ce57dfc1
AV
1655 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1656 if (err < 0)
1657 return err;
1da177e4 1658
ce57dfc1 1659 if (err) {
b356379a 1660 err = nested_symlink(&next, nd);
1da177e4 1661 if (err)
a7472bab 1662 return err;
31e6b01f 1663 }
3ddcd056
LT
1664 if (can_lookup(nd->inode))
1665 continue;
1da177e4 1666 err = -ENOTDIR;
3ddcd056 1667 break;
1da177e4
LT
1668 /* here ends the main loop */
1669
1da177e4 1670last_component:
b356379a
AV
1671 nd->last = this;
1672 nd->last_type = type;
086e183a 1673 return 0;
1da177e4 1674 }
951361f9 1675 terminate_walk(nd);
1da177e4
LT
1676 return err;
1677}
1678
70e9b357
AV
1679static int path_init(int dfd, const char *name, unsigned int flags,
1680 struct nameidata *nd, struct file **fp)
31e6b01f
NP
1681{
1682 int retval = 0;
1683 int fput_needed;
1684 struct file *file;
1685
1686 nd->last_type = LAST_ROOT; /* if there are only slashes... */
16c2cd71 1687 nd->flags = flags | LOOKUP_JUMPED;
31e6b01f 1688 nd->depth = 0;
5b6ca027
AV
1689 if (flags & LOOKUP_ROOT) {
1690 struct inode *inode = nd->root.dentry->d_inode;
73d049a4
AV
1691 if (*name) {
1692 if (!inode->i_op->lookup)
1693 return -ENOTDIR;
1694 retval = inode_permission(inode, MAY_EXEC);
1695 if (retval)
1696 return retval;
1697 }
5b6ca027
AV
1698 nd->path = nd->root;
1699 nd->inode = inode;
1700 if (flags & LOOKUP_RCU) {
32a7991b 1701 lock_rcu_walk();
5b6ca027
AV
1702 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1703 } else {
1704 path_get(&nd->path);
1705 }
1706 return 0;
1707 }
1708
31e6b01f 1709 nd->root.mnt = NULL;
31e6b01f
NP
1710
1711 if (*name=='/') {
e41f7d4e 1712 if (flags & LOOKUP_RCU) {
32a7991b 1713 lock_rcu_walk();
e41f7d4e
AV
1714 set_root_rcu(nd);
1715 } else {
1716 set_root(nd);
1717 path_get(&nd->root);
1718 }
1719 nd->path = nd->root;
31e6b01f 1720 } else if (dfd == AT_FDCWD) {
e41f7d4e
AV
1721 if (flags & LOOKUP_RCU) {
1722 struct fs_struct *fs = current->fs;
1723 unsigned seq;
31e6b01f 1724
32a7991b 1725 lock_rcu_walk();
c28cc364 1726
e41f7d4e
AV
1727 do {
1728 seq = read_seqcount_begin(&fs->seq);
1729 nd->path = fs->pwd;
1730 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1731 } while (read_seqcount_retry(&fs->seq, seq));
1732 } else {
1733 get_fs_pwd(current->fs, &nd->path);
1734 }
31e6b01f
NP
1735 } else {
1736 struct dentry *dentry;
1737
1abf0c71 1738 file = fget_raw_light(dfd, &fput_needed);
31e6b01f
NP
1739 retval = -EBADF;
1740 if (!file)
1741 goto out_fail;
1742
1743 dentry = file->f_path.dentry;
1744
f52e0c11
AV
1745 if (*name) {
1746 retval = -ENOTDIR;
1747 if (!S_ISDIR(dentry->d_inode->i_mode))
1748 goto fput_fail;
31e6b01f 1749
4ad5abb3 1750 retval = inode_permission(dentry->d_inode, MAY_EXEC);
f52e0c11
AV
1751 if (retval)
1752 goto fput_fail;
1753 }
31e6b01f
NP
1754
1755 nd->path = file->f_path;
e41f7d4e
AV
1756 if (flags & LOOKUP_RCU) {
1757 if (fput_needed)
70e9b357 1758 *fp = file;
e41f7d4e 1759 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
32a7991b 1760 lock_rcu_walk();
e41f7d4e
AV
1761 } else {
1762 path_get(&file->f_path);
1763 fput_light(file, fput_needed);
1764 }
31e6b01f 1765 }
31e6b01f 1766
31e6b01f 1767 nd->inode = nd->path.dentry->d_inode;
9b4a9b14 1768 return 0;
2dfdd266 1769
9b4a9b14
AV
1770fput_fail:
1771 fput_light(file, fput_needed);
1772out_fail:
1773 return retval;
1774}
1775
bd92d7fe
AV
1776static inline int lookup_last(struct nameidata *nd, struct path *path)
1777{
1778 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1779 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1780
1781 nd->flags &= ~LOOKUP_PARENT;
1782 return walk_component(nd, path, &nd->last, nd->last_type,
1783 nd->flags & LOOKUP_FOLLOW);
1784}
1785
9b4a9b14 1786/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
ee0827cd 1787static int path_lookupat(int dfd, const char *name,
9b4a9b14
AV
1788 unsigned int flags, struct nameidata *nd)
1789{
70e9b357 1790 struct file *base = NULL;
bd92d7fe
AV
1791 struct path path;
1792 int err;
31e6b01f
NP
1793
1794 /*
1795 * Path walking is largely split up into 2 different synchronisation
1796 * schemes, rcu-walk and ref-walk (explained in
1797 * Documentation/filesystems/path-lookup.txt). These share much of the
1798 * path walk code, but some things particularly setup, cleanup, and
1799 * following mounts are sufficiently divergent that functions are
1800 * duplicated. Typically there is a function foo(), and its RCU
1801 * analogue, foo_rcu().
1802 *
1803 * -ECHILD is the error number of choice (just to avoid clashes) that
1804 * is returned if some aspect of an rcu-walk fails. Such an error must
1805 * be handled by restarting a traditional ref-walk (which will always
1806 * be able to complete).
1807 */
bd92d7fe 1808 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
ee0827cd 1809
bd92d7fe
AV
1810 if (unlikely(err))
1811 return err;
ee0827cd
AV
1812
1813 current->total_link_count = 0;
bd92d7fe
AV
1814 err = link_path_walk(name, nd);
1815
1816 if (!err && !(flags & LOOKUP_PARENT)) {
bd92d7fe
AV
1817 err = lookup_last(nd, &path);
1818 while (err > 0) {
1819 void *cookie;
1820 struct path link = path;
bd92d7fe 1821 nd->flags |= LOOKUP_PARENT;
574197e0 1822 err = follow_link(&link, nd, &cookie);
6d7b5aae
AV
1823 if (err)
1824 break;
1825 err = lookup_last(nd, &path);
574197e0 1826 put_link(nd, &link, cookie);
bd92d7fe
AV
1827 }
1828 }
ee0827cd 1829
9f1fafee
AV
1830 if (!err)
1831 err = complete_walk(nd);
bd92d7fe
AV
1832
1833 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1834 if (!nd->inode->i_op->lookup) {
1835 path_put(&nd->path);
bd23a539 1836 err = -ENOTDIR;
bd92d7fe
AV
1837 }
1838 }
16c2cd71 1839
70e9b357
AV
1840 if (base)
1841 fput(base);
ee0827cd 1842
5b6ca027 1843 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
2a737871
AV
1844 path_put(&nd->root);
1845 nd->root.mnt = NULL;
1846 }
bd92d7fe 1847 return err;
ee0827cd 1848}
31e6b01f 1849
ee0827cd
AV
1850static int do_path_lookup(int dfd, const char *name,
1851 unsigned int flags, struct nameidata *nd)
1852{
1853 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1854 if (unlikely(retval == -ECHILD))
1855 retval = path_lookupat(dfd, name, flags, nd);
1856 if (unlikely(retval == -ESTALE))
1857 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
31e6b01f
NP
1858
1859 if (likely(!retval)) {
1860 if (unlikely(!audit_dummy_context())) {
1861 if (nd->path.dentry && nd->inode)
1862 audit_inode(name, nd->path.dentry);
1863 }
1864 }
170aa3d0 1865 return retval;
1da177e4
LT
1866}
1867
79714f72
AV
1868/* does lookup, returns the object with parent locked */
1869struct dentry *kern_path_locked(const char *name, struct path *path)
5590ff0d 1870{
79714f72
AV
1871 struct nameidata nd;
1872 struct dentry *d;
1873 int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd);
1874 if (err)
1875 return ERR_PTR(err);
1876 if (nd.last_type != LAST_NORM) {
1877 path_put(&nd.path);
1878 return ERR_PTR(-EINVAL);
1879 }
1880 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
1e0ea001 1881 d = __lookup_hash(&nd.last, nd.path.dentry, 0);
79714f72
AV
1882 if (IS_ERR(d)) {
1883 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1884 path_put(&nd.path);
1885 return d;
1886 }
1887 *path = nd.path;
1888 return d;
5590ff0d
UD
1889}
1890
d1811465
AV
1891int kern_path(const char *name, unsigned int flags, struct path *path)
1892{
1893 struct nameidata nd;
1894 int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
1895 if (!res)
1896 *path = nd.path;
1897 return res;
1898}
1899
16f18200
JJS
1900/**
1901 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
1902 * @dentry: pointer to dentry of the base directory
1903 * @mnt: pointer to vfs mount of the base directory
1904 * @name: pointer to file name
1905 * @flags: lookup flags
e0a01249 1906 * @path: pointer to struct path to fill
16f18200
JJS
1907 */
1908int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1909 const char *name, unsigned int flags,
e0a01249 1910 struct path *path)
16f18200 1911{
e0a01249
AV
1912 struct nameidata nd;
1913 int err;
1914 nd.root.dentry = dentry;
1915 nd.root.mnt = mnt;
1916 BUG_ON(flags & LOOKUP_PARENT);
5b6ca027 1917 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
e0a01249
AV
1918 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
1919 if (!err)
1920 *path = nd.path;
1921 return err;
16f18200
JJS
1922}
1923
057f6c01
JM
1924/*
1925 * Restricted form of lookup. Doesn't follow links, single-component only,
1926 * needs parent already locked. Doesn't follow mounts.
1927 * SMP-safe.
1928 */
eead1911 1929static struct dentry *lookup_hash(struct nameidata *nd)
057f6c01 1930{
72bd866a 1931 return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
1da177e4
LT
1932}
1933
eead1911 1934/**
a6b91919 1935 * lookup_one_len - filesystem helper to lookup single pathname component
eead1911
CH
1936 * @name: pathname component to lookup
1937 * @base: base directory to lookup from
1938 * @len: maximum length @len should be interpreted to
1939 *
a6b91919
RD
1940 * Note that this routine is purely a helper for filesystem usage and should
1941 * not be called by generic code. Also note that by using this function the
eead1911
CH
1942 * nameidata argument is passed to the filesystem methods and a filesystem
1943 * using this helper needs to be prepared for that.
1944 */
057f6c01
JM
1945struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1946{
057f6c01 1947 struct qstr this;
6a96ba54 1948 unsigned int c;
cda309de 1949 int err;
057f6c01 1950
2f9092e1
DW
1951 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1952
6a96ba54
AV
1953 this.name = name;
1954 this.len = len;
0145acc2 1955 this.hash = full_name_hash(name, len);
6a96ba54
AV
1956 if (!len)
1957 return ERR_PTR(-EACCES);
1958
6a96ba54
AV
1959 while (len--) {
1960 c = *(const unsigned char *)name++;
1961 if (c == '/' || c == '\0')
1962 return ERR_PTR(-EACCES);
6a96ba54 1963 }
5a202bcd
AV
1964 /*
1965 * See if the low-level filesystem might want
1966 * to use its own hash..
1967 */
1968 if (base->d_flags & DCACHE_OP_HASH) {
1969 int err = base->d_op->d_hash(base, base->d_inode, &this);
1970 if (err < 0)
1971 return ERR_PTR(err);
1972 }
eead1911 1973
cda309de
MS
1974 err = inode_permission(base->d_inode, MAY_EXEC);
1975 if (err)
1976 return ERR_PTR(err);
1977
72bd866a 1978 return __lookup_hash(&this, base, 0);
057f6c01
JM
1979}
1980
1fa1e7f6
AW
1981int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1982 struct path *path, int *empty)
1da177e4 1983{
2d8f3038 1984 struct nameidata nd;
1fa1e7f6 1985 char *tmp = getname_flags(name, flags, empty);
1da177e4 1986 int err = PTR_ERR(tmp);
1da177e4 1987 if (!IS_ERR(tmp)) {
2d8f3038
AV
1988
1989 BUG_ON(flags & LOOKUP_PARENT);
1990
1991 err = do_path_lookup(dfd, tmp, flags, &nd);
1da177e4 1992 putname(tmp);
2d8f3038
AV
1993 if (!err)
1994 *path = nd.path;
1da177e4
LT
1995 }
1996 return err;
1997}
1998
1fa1e7f6
AW
1999int user_path_at(int dfd, const char __user *name, unsigned flags,
2000 struct path *path)
2001{
f7493e5d 2002 return user_path_at_empty(dfd, name, flags, path, NULL);
1fa1e7f6
AW
2003}
2004
2ad94ae6
AV
2005static int user_path_parent(int dfd, const char __user *path,
2006 struct nameidata *nd, char **name)
2007{
2008 char *s = getname(path);
2009 int error;
2010
2011 if (IS_ERR(s))
2012 return PTR_ERR(s);
2013
2014 error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
2015 if (error)
2016 putname(s);
2017 else
2018 *name = s;
2019
2020 return error;
2021}
2022
1da177e4
LT
2023/*
2024 * It's inline, so penalty for filesystems that don't use sticky bit is
2025 * minimal.
2026 */
2027static inline int check_sticky(struct inode *dir, struct inode *inode)
2028{
8e96e3b7 2029 kuid_t fsuid = current_fsuid();
da9592ed 2030
1da177e4
LT
2031 if (!(dir->i_mode & S_ISVTX))
2032 return 0;
8e96e3b7 2033 if (uid_eq(inode->i_uid, fsuid))
1da177e4 2034 return 0;
8e96e3b7 2035 if (uid_eq(dir->i_uid, fsuid))
1da177e4 2036 return 0;
1a48e2ac 2037 return !inode_capable(inode, CAP_FOWNER);
1da177e4
LT
2038}
2039
2040/*
2041 * Check whether we can remove a link victim from directory dir, check
2042 * whether the type of victim is right.
2043 * 1. We can't do it if dir is read-only (done in permission())
2044 * 2. We should have write and exec permissions on dir
2045 * 3. We can't remove anything from append-only dir
2046 * 4. We can't do anything with immutable dir (done in permission())
2047 * 5. If the sticky bit on dir is set we should either
2048 * a. be owner of dir, or
2049 * b. be owner of victim, or
2050 * c. have CAP_FOWNER capability
2051 * 6. If the victim is append-only or immutable we can't do antyhing with
2052 * links pointing to it.
2053 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
2054 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
2055 * 9. We can't remove a root or mountpoint.
2056 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
2057 * nfs_async_unlink().
2058 */
858119e1 2059static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1da177e4
LT
2060{
2061 int error;
2062
2063 if (!victim->d_inode)
2064 return -ENOENT;
2065
2066 BUG_ON(victim->d_parent->d_inode != dir);
cccc6bba 2067 audit_inode_child(victim, dir);
1da177e4 2068
f419a2e3 2069 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1da177e4
LT
2070 if (error)
2071 return error;
2072 if (IS_APPEND(dir))
2073 return -EPERM;
2074 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
f9454548 2075 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
1da177e4
LT
2076 return -EPERM;
2077 if (isdir) {
2078 if (!S_ISDIR(victim->d_inode->i_mode))
2079 return -ENOTDIR;
2080 if (IS_ROOT(victim))
2081 return -EBUSY;
2082 } else if (S_ISDIR(victim->d_inode->i_mode))
2083 return -EISDIR;
2084 if (IS_DEADDIR(dir))
2085 return -ENOENT;
2086 if (victim->d_flags & DCACHE_NFSFS_RENAMED)
2087 return -EBUSY;
2088 return 0;
2089}
2090
2091/* Check whether we can create an object with dentry child in directory
2092 * dir.
2093 * 1. We can't do it if child already exists (open has special treatment for
2094 * this case, but since we are inlined it's OK)
2095 * 2. We can't do it if dir is read-only (done in permission())
2096 * 3. We should have write and exec permissions on dir
2097 * 4. We can't do it if dir is immutable (done in permission())
2098 */
a95164d9 2099static inline int may_create(struct inode *dir, struct dentry *child)
1da177e4
LT
2100{
2101 if (child->d_inode)
2102 return -EEXIST;
2103 if (IS_DEADDIR(dir))
2104 return -ENOENT;
f419a2e3 2105 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1da177e4
LT
2106}
2107
1da177e4
LT
2108/*
2109 * p1 and p2 should be directories on the same fs.
2110 */
2111struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
2112{
2113 struct dentry *p;
2114
2115 if (p1 == p2) {
f2eace23 2116 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1da177e4
LT
2117 return NULL;
2118 }
2119
a11f3a05 2120 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1da177e4 2121
e2761a11
OH
2122 p = d_ancestor(p2, p1);
2123 if (p) {
2124 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
2125 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
2126 return p;
1da177e4
LT
2127 }
2128
e2761a11
OH
2129 p = d_ancestor(p1, p2);
2130 if (p) {
2131 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
2132 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
2133 return p;
1da177e4
LT
2134 }
2135
f2eace23
IM
2136 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
2137 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1da177e4
LT
2138 return NULL;
2139}
2140
2141void unlock_rename(struct dentry *p1, struct dentry *p2)
2142{
1b1dcc1b 2143 mutex_unlock(&p1->d_inode->i_mutex);
1da177e4 2144 if (p1 != p2) {
1b1dcc1b 2145 mutex_unlock(&p2->d_inode->i_mutex);
a11f3a05 2146 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1da177e4
LT
2147 }
2148}
2149
4acdaf27 2150int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
312b63fb 2151 bool want_excl)
1da177e4 2152{
a95164d9 2153 int error = may_create(dir, dentry);
1da177e4
LT
2154 if (error)
2155 return error;
2156
acfa4380 2157 if (!dir->i_op->create)
1da177e4
LT
2158 return -EACCES; /* shouldn't it be ENOSYS? */
2159 mode &= S_IALLUGO;
2160 mode |= S_IFREG;
2161 error = security_inode_create(dir, dentry, mode);
2162 if (error)
2163 return error;
312b63fb 2164 error = dir->i_op->create(dir, dentry, mode, want_excl);
a74574aa 2165 if (!error)
f38aa942 2166 fsnotify_create(dir, dentry);
1da177e4
LT
2167 return error;
2168}
2169
73d049a4 2170static int may_open(struct path *path, int acc_mode, int flag)
1da177e4 2171{
3fb64190 2172 struct dentry *dentry = path->dentry;
1da177e4
LT
2173 struct inode *inode = dentry->d_inode;
2174 int error;
2175
bcda7652
AV
2176 /* O_PATH? */
2177 if (!acc_mode)
2178 return 0;
2179
1da177e4
LT
2180 if (!inode)
2181 return -ENOENT;
2182
c8fe8f30
CH
2183 switch (inode->i_mode & S_IFMT) {
2184 case S_IFLNK:
1da177e4 2185 return -ELOOP;
c8fe8f30
CH
2186 case S_IFDIR:
2187 if (acc_mode & MAY_WRITE)
2188 return -EISDIR;
2189 break;
2190 case S_IFBLK:
2191 case S_IFCHR:
3fb64190 2192 if (path->mnt->mnt_flags & MNT_NODEV)
1da177e4 2193 return -EACCES;
c8fe8f30
CH
2194 /*FALLTHRU*/
2195 case S_IFIFO:
2196 case S_IFSOCK:
1da177e4 2197 flag &= ~O_TRUNC;
c8fe8f30 2198 break;
4a3fd211 2199 }
b41572e9 2200
3fb64190 2201 error = inode_permission(inode, acc_mode);
b41572e9
DH
2202 if (error)
2203 return error;
6146f0d5 2204
1da177e4
LT
2205 /*
2206 * An append-only file must be opened in append mode for writing.
2207 */
2208 if (IS_APPEND(inode)) {
8737c930 2209 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
7715b521 2210 return -EPERM;
1da177e4 2211 if (flag & O_TRUNC)
7715b521 2212 return -EPERM;
1da177e4
LT
2213 }
2214
2215 /* O_NOATIME can only be set by the owner or superuser */
2e149670 2216 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
7715b521 2217 return -EPERM;
1da177e4 2218
f3c7691e 2219 return 0;
7715b521 2220}
1da177e4 2221
e1181ee6 2222static int handle_truncate(struct file *filp)
7715b521 2223{
e1181ee6 2224 struct path *path = &filp->f_path;
7715b521
AV
2225 struct inode *inode = path->dentry->d_inode;
2226 int error = get_write_access(inode);
2227 if (error)
2228 return error;
2229 /*
2230 * Refuse to truncate files with mandatory locks held on them.
2231 */
2232 error = locks_verify_locked(inode);
2233 if (!error)
ea0d3ab2 2234 error = security_path_truncate(path);
7715b521
AV
2235 if (!error) {
2236 error = do_truncate(path->dentry, 0,
2237 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
e1181ee6 2238 filp);
7715b521
AV
2239 }
2240 put_write_access(inode);
acd0c935 2241 return error;
1da177e4
LT
2242}
2243
d57999e1
DH
2244static inline int open_to_namei_flags(int flag)
2245{
8a5e929d
AV
2246 if ((flag & O_ACCMODE) == 3)
2247 flag--;
d57999e1
DH
2248 return flag;
2249}
2250
d18e9008
MS
2251static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2252{
2253 int error = security_path_mknod(dir, dentry, mode, 0);
2254 if (error)
2255 return error;
2256
2257 error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
2258 if (error)
2259 return error;
2260
2261 return security_inode_create(dir->dentry->d_inode, dentry, mode);
2262}
2263
1acf0af9
DH
2264/*
2265 * Attempt to atomically look up, create and open a file from a negative
2266 * dentry.
2267 *
2268 * Returns 0 if successful. The file will have been created and attached to
2269 * @file by the filesystem calling finish_open().
2270 *
2271 * Returns 1 if the file was looked up only or didn't need creating. The
2272 * caller will need to perform the open themselves. @path will have been
2273 * updated to point to the new dentry. This may be negative.
2274 *
2275 * Returns an error code otherwise.
2276 */
2675a4eb
AV
2277static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2278 struct path *path, struct file *file,
2279 const struct open_flags *op,
2280 bool *want_write, bool need_lookup,
2281 int *opened)
d18e9008
MS
2282{
2283 struct inode *dir = nd->path.dentry->d_inode;
2284 unsigned open_flag = open_to_namei_flags(op->open_flag);
2285 umode_t mode;
2286 int error;
2287 int acc_mode;
d18e9008
MS
2288 int create_error = 0;
2289 struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
2290
2291 BUG_ON(dentry->d_inode);
2292
2293 /* Don't create child dentry for a dead directory. */
2294 if (unlikely(IS_DEADDIR(dir))) {
2675a4eb 2295 error = -ENOENT;
d18e9008
MS
2296 goto out;
2297 }
2298
2299 mode = op->mode & S_IALLUGO;
2300 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2301 mode &= ~current_umask();
2302
2303 if (open_flag & O_EXCL) {
2304 open_flag &= ~O_TRUNC;
47237687 2305 *opened |= FILE_CREATED;
d18e9008
MS
2306 }
2307
2308 /*
2309 * Checking write permission is tricky, bacuse we don't know if we are
2310 * going to actually need it: O_CREAT opens should work as long as the
2311 * file exists. But checking existence breaks atomicity. The trick is
2312 * to check access and if not granted clear O_CREAT from the flags.
2313 *
2314 * Another problem is returing the "right" error value (e.g. for an
2315 * O_EXCL open we want to return EEXIST not EROFS).
2316 */
2317 if ((open_flag & (O_CREAT | O_TRUNC)) ||
2318 (open_flag & O_ACCMODE) != O_RDONLY) {
2319 error = mnt_want_write(nd->path.mnt);
2320 if (!error) {
77d660a8 2321 *want_write = true;
d18e9008
MS
2322 } else if (!(open_flag & O_CREAT)) {
2323 /*
2324 * No O_CREATE -> atomicity not a requirement -> fall
2325 * back to lookup + open
2326 */
2327 goto no_open;
2328 } else if (open_flag & (O_EXCL | O_TRUNC)) {
2329 /* Fall back and fail with the right error */
2330 create_error = error;
2331 goto no_open;
2332 } else {
2333 /* No side effects, safe to clear O_CREAT */
2334 create_error = error;
2335 open_flag &= ~O_CREAT;
2336 }
2337 }
2338
2339 if (open_flag & O_CREAT) {
2340 error = may_o_create(&nd->path, dentry, op->mode);
2341 if (error) {
2342 create_error = error;
2343 if (open_flag & O_EXCL)
2344 goto no_open;
2345 open_flag &= ~O_CREAT;
2346 }
2347 }
2348
2349 if (nd->flags & LOOKUP_DIRECTORY)
2350 open_flag |= O_DIRECTORY;
2351
30d90494
AV
2352 file->f_path.dentry = DENTRY_NOT_SET;
2353 file->f_path.mnt = nd->path.mnt;
2354 error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode,
47237687 2355 opened);
d9585277 2356 if (error < 0) {
d9585277
AV
2357 if (create_error && error == -ENOENT)
2358 error = create_error;
d18e9008
MS
2359 goto out;
2360 }
2361
2362 acc_mode = op->acc_mode;
47237687 2363 if (*opened & FILE_CREATED) {
d18e9008
MS
2364 fsnotify_create(dir, dentry);
2365 acc_mode = MAY_OPEN;
2366 }
2367
d9585277 2368 if (error) { /* returned 1, that is */
30d90494 2369 if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
2675a4eb 2370 error = -EIO;
d18e9008
MS
2371 goto out;
2372 }
30d90494 2373 if (file->f_path.dentry) {
d18e9008 2374 dput(dentry);
30d90494 2375 dentry = file->f_path.dentry;
d18e9008
MS
2376 }
2377 goto looked_up;
2378 }
2379
2380 /*
2381 * We didn't have the inode before the open, so check open permission
2382 * here.
2383 */
2675a4eb
AV
2384 error = may_open(&file->f_path, acc_mode, open_flag);
2385 if (error)
2386 fput(file);
d18e9008
MS
2387
2388out:
2389 dput(dentry);
2675a4eb 2390 return error;
d18e9008 2391
d18e9008
MS
2392no_open:
2393 if (need_lookup) {
72bd866a 2394 dentry = lookup_real(dir, dentry, nd->flags);
d18e9008 2395 if (IS_ERR(dentry))
2675a4eb 2396 return PTR_ERR(dentry);
d18e9008
MS
2397
2398 if (create_error) {
2399 int open_flag = op->open_flag;
2400
2675a4eb 2401 error = create_error;
d18e9008
MS
2402 if ((open_flag & O_EXCL)) {
2403 if (!dentry->d_inode)
2404 goto out;
2405 } else if (!dentry->d_inode) {
2406 goto out;
2407 } else if ((open_flag & O_TRUNC) &&
2408 S_ISREG(dentry->d_inode->i_mode)) {
2409 goto out;
2410 }
2411 /* will fail later, go on to get the right error */
2412 }
2413 }
2414looked_up:
2415 path->dentry = dentry;
2416 path->mnt = nd->path.mnt;
2675a4eb 2417 return 1;
d18e9008
MS
2418}
2419
d58ffd35 2420/*
1acf0af9 2421 * Look up and maybe create and open the last component.
d58ffd35
MS
2422 *
2423 * Must be called with i_mutex held on parent.
2424 *
1acf0af9
DH
2425 * Returns 0 if the file was successfully atomically created (if necessary) and
2426 * opened. In this case the file will be returned attached to @file.
2427 *
2428 * Returns 1 if the file was not completely opened at this time, though lookups
2429 * and creations will have been performed and the dentry returned in @path will
2430 * be positive upon return if O_CREAT was specified. If O_CREAT wasn't
2431 * specified then a negative dentry may be returned.
2432 *
2433 * An error code is returned otherwise.
2434 *
2435 * FILE_CREATE will be set in @*opened if the dentry was created and will be
2436 * cleared otherwise prior to returning.
d58ffd35 2437 */
2675a4eb
AV
2438static int lookup_open(struct nameidata *nd, struct path *path,
2439 struct file *file,
2440 const struct open_flags *op,
2441 bool *want_write, int *opened)
d58ffd35
MS
2442{
2443 struct dentry *dir = nd->path.dentry;
54ef4872 2444 struct inode *dir_inode = dir->d_inode;
d58ffd35
MS
2445 struct dentry *dentry;
2446 int error;
54ef4872 2447 bool need_lookup;
d58ffd35 2448
47237687 2449 *opened &= ~FILE_CREATED;
201f956e 2450 dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
d58ffd35 2451 if (IS_ERR(dentry))
2675a4eb 2452 return PTR_ERR(dentry);
d58ffd35 2453
d18e9008
MS
2454 /* Cached positive dentry: will open in f_op->open */
2455 if (!need_lookup && dentry->d_inode)
2456 goto out_no_open;
2457
2458 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
30d90494 2459 return atomic_open(nd, dentry, path, file, op, want_write,
47237687 2460 need_lookup, opened);
d18e9008
MS
2461 }
2462
54ef4872
MS
2463 if (need_lookup) {
2464 BUG_ON(dentry->d_inode);
2465
72bd866a 2466 dentry = lookup_real(dir_inode, dentry, nd->flags);
54ef4872 2467 if (IS_ERR(dentry))
2675a4eb 2468 return PTR_ERR(dentry);
54ef4872
MS
2469 }
2470
d58ffd35
MS
2471 /* Negative dentry, just create the file */
2472 if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
2473 umode_t mode = op->mode;
2474 if (!IS_POSIXACL(dir->d_inode))
2475 mode &= ~current_umask();
2476 /*
2477 * This write is needed to ensure that a
2478 * rw->ro transition does not occur between
2479 * the time when the file is created and when
2480 * a permanent write count is taken through
015c3bbc 2481 * the 'struct file' in finish_open().
d58ffd35
MS
2482 */
2483 error = mnt_want_write(nd->path.mnt);
2484 if (error)
2485 goto out_dput;
77d660a8 2486 *want_write = true;
47237687 2487 *opened |= FILE_CREATED;
d58ffd35
MS
2488 error = security_path_mknod(&nd->path, dentry, mode, 0);
2489 if (error)
2490 goto out_dput;
312b63fb
AV
2491 error = vfs_create(dir->d_inode, dentry, mode,
2492 nd->flags & LOOKUP_EXCL);
d58ffd35
MS
2493 if (error)
2494 goto out_dput;
2495 }
d18e9008 2496out_no_open:
d58ffd35
MS
2497 path->dentry = dentry;
2498 path->mnt = nd->path.mnt;
2675a4eb 2499 return 1;
d58ffd35
MS
2500
2501out_dput:
2502 dput(dentry);
2675a4eb 2503 return error;
d58ffd35
MS
2504}
2505
31e6b01f 2506/*
fe2d35ff 2507 * Handle the last step of open()
31e6b01f 2508 */
2675a4eb
AV
2509static int do_last(struct nameidata *nd, struct path *path,
2510 struct file *file, const struct open_flags *op,
2511 int *opened, const char *pathname)
fb1cc555 2512{
a1e28038 2513 struct dentry *dir = nd->path.dentry;
ca344a89 2514 int open_flag = op->open_flag;
77d660a8
MS
2515 bool will_truncate = (open_flag & O_TRUNC) != 0;
2516 bool want_write = false;
bcda7652 2517 int acc_mode = op->acc_mode;
a1eb3315 2518 struct inode *inode;
77d660a8 2519 bool symlink_ok = false;
16b1c1cd
MS
2520 struct path save_parent = { .dentry = NULL, .mnt = NULL };
2521 bool retried = false;
16c2cd71 2522 int error;
1f36f774 2523
c3e380b0
AV
2524 nd->flags &= ~LOOKUP_PARENT;
2525 nd->flags |= op->intent;
2526
1f36f774
AV
2527 switch (nd->last_type) {
2528 case LAST_DOTDOT:
176306f5 2529 case LAST_DOT:
fe2d35ff
AV
2530 error = handle_dots(nd, nd->last_type);
2531 if (error)
2675a4eb 2532 return error;
1f36f774 2533 /* fallthrough */
1f36f774 2534 case LAST_ROOT:
9f1fafee 2535 error = complete_walk(nd);
16c2cd71 2536 if (error)
2675a4eb 2537 return error;
fe2d35ff 2538 audit_inode(pathname, nd->path.dentry);
ca344a89 2539 if (open_flag & O_CREAT) {
fe2d35ff 2540 error = -EISDIR;
2675a4eb 2541 goto out;
fe2d35ff 2542 }
e83db167 2543 goto finish_open;
1f36f774 2544 case LAST_BIND:
9f1fafee 2545 error = complete_walk(nd);
16c2cd71 2546 if (error)
2675a4eb 2547 return error;
1f36f774 2548 audit_inode(pathname, dir);
e83db167 2549 goto finish_open;
1f36f774 2550 }
67ee3ad2 2551
ca344a89 2552 if (!(open_flag & O_CREAT)) {
fe2d35ff
AV
2553 if (nd->last.name[nd->last.len])
2554 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
bcda7652 2555 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
77d660a8 2556 symlink_ok = true;
fe2d35ff 2557 /* we _can_ be in RCU mode here */
a1eb3315 2558 error = lookup_fast(nd, &nd->last, path, &inode);
71574865
MS
2559 if (likely(!error))
2560 goto finish_lookup;
2561
2562 if (error < 0)
2675a4eb 2563 goto out;
71574865
MS
2564
2565 BUG_ON(nd->inode != dir->d_inode);
b6183df7
MS
2566 } else {
2567 /* create side of things */
2568 /*
2569 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
2570 * has been cleared when we got to the last component we are
2571 * about to look up
2572 */
2573 error = complete_walk(nd);
2574 if (error)
2675a4eb 2575 return error;
fe2d35ff 2576
b6183df7
MS
2577 audit_inode(pathname, dir);
2578 error = -EISDIR;
2579 /* trailing slashes? */
2580 if (nd->last.name[nd->last.len])
2675a4eb 2581 goto out;
b6183df7 2582 }
a2c36b45 2583
16b1c1cd 2584retry_lookup:
a1e28038 2585 mutex_lock(&dir->d_inode->i_mutex);
2675a4eb 2586 error = lookup_open(nd, path, file, op, &want_write, opened);
d58ffd35 2587 mutex_unlock(&dir->d_inode->i_mutex);
a1e28038 2588
2675a4eb
AV
2589 if (error <= 0) {
2590 if (error)
d18e9008
MS
2591 goto out;
2592
47237687 2593 if ((*opened & FILE_CREATED) ||
2675a4eb 2594 !S_ISREG(file->f_path.dentry->d_inode->i_mode))
77d660a8 2595 will_truncate = false;
d18e9008 2596
2675a4eb 2597 audit_inode(pathname, file->f_path.dentry);
d18e9008
MS
2598 goto opened;
2599 }
fb1cc555 2600
47237687 2601 if (*opened & FILE_CREATED) {
9b44f1b3 2602 /* Don't check for write permission, don't truncate */
ca344a89 2603 open_flag &= ~O_TRUNC;
77d660a8 2604 will_truncate = false;
bcda7652 2605 acc_mode = MAY_OPEN;
d58ffd35 2606 path_to_nameidata(path, nd);
e83db167 2607 goto finish_open_created;
fb1cc555
AV
2608 }
2609
2610 /*
2611 * It already exists.
2612 */
fb1cc555
AV
2613 audit_inode(pathname, path->dentry);
2614
d18e9008
MS
2615 /*
2616 * If atomic_open() acquired write access it is dropped now due to
2617 * possible mount and symlink following (this might be optimized away if
2618 * necessary...)
2619 */
2620 if (want_write) {
2621 mnt_drop_write(nd->path.mnt);
77d660a8 2622 want_write = false;
d18e9008
MS
2623 }
2624
fb1cc555 2625 error = -EEXIST;
ca344a89 2626 if (open_flag & O_EXCL)
fb1cc555
AV
2627 goto exit_dput;
2628
9875cf80
DH
2629 error = follow_managed(path, nd->flags);
2630 if (error < 0)
2631 goto exit_dput;
fb1cc555 2632
a3fbbde7
AV
2633 if (error)
2634 nd->flags |= LOOKUP_JUMPED;
2635
decf3400
MS
2636 BUG_ON(nd->flags & LOOKUP_RCU);
2637 inode = path->dentry->d_inode;
5f5daac1
MS
2638finish_lookup:
2639 /* we _can_ be in RCU mode here */
fb1cc555 2640 error = -ENOENT;
54c33e7f
MS
2641 if (!inode) {
2642 path_to_nameidata(path, nd);
2675a4eb 2643 goto out;
54c33e7f 2644 }
9e67f361 2645
d45ea867
MS
2646 if (should_follow_link(inode, !symlink_ok)) {
2647 if (nd->flags & LOOKUP_RCU) {
2648 if (unlikely(unlazy_walk(nd, path->dentry))) {
2649 error = -ECHILD;
2675a4eb 2650 goto out;
d45ea867
MS
2651 }
2652 }
2653 BUG_ON(inode != path->dentry->d_inode);
2675a4eb 2654 return 1;
d45ea867 2655 }
fb1cc555 2656
16b1c1cd
MS
2657 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
2658 path_to_nameidata(path, nd);
2659 } else {
2660 save_parent.dentry = nd->path.dentry;
2661 save_parent.mnt = mntget(path->mnt);
2662 nd->path.dentry = path->dentry;
2663
2664 }
decf3400 2665 nd->inode = inode;
a3fbbde7
AV
2666 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
2667 error = complete_walk(nd);
16b1c1cd
MS
2668 if (error) {
2669 path_put(&save_parent);
2675a4eb 2670 return error;
16b1c1cd 2671 }
fb1cc555 2672 error = -EISDIR;
050ac841 2673 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
2675a4eb 2674 goto out;
af2f5542
MS
2675 error = -ENOTDIR;
2676 if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
2675a4eb 2677 goto out;
d7fdd7f6 2678 audit_inode(pathname, nd->path.dentry);
e83db167 2679finish_open:
6c0d46c4 2680 if (!S_ISREG(nd->inode->i_mode))
77d660a8 2681 will_truncate = false;
6c0d46c4 2682
0f9d1a10
AV
2683 if (will_truncate) {
2684 error = mnt_want_write(nd->path.mnt);
2685 if (error)
2675a4eb 2686 goto out;
77d660a8 2687 want_write = true;
0f9d1a10 2688 }
e83db167 2689finish_open_created:
bcda7652 2690 error = may_open(&nd->path, acc_mode, open_flag);
ca344a89 2691 if (error)
2675a4eb 2692 goto out;
30d90494
AV
2693 file->f_path.mnt = nd->path.mnt;
2694 error = finish_open(file, nd->path.dentry, NULL, opened);
2695 if (error) {
30d90494 2696 if (error == -EOPENSTALE)
f60dc3db 2697 goto stale_open;
015c3bbc 2698 goto out;
f60dc3db 2699 }
a8277b9b 2700opened:
2675a4eb 2701 error = open_check_o_direct(file);
015c3bbc
MS
2702 if (error)
2703 goto exit_fput;
2675a4eb 2704 error = ima_file_check(file, op->acc_mode);
aa4caadb
MS
2705 if (error)
2706 goto exit_fput;
2707
2708 if (will_truncate) {
2675a4eb 2709 error = handle_truncate(file);
aa4caadb
MS
2710 if (error)
2711 goto exit_fput;
0f9d1a10 2712 }
ca344a89
AV
2713out:
2714 if (want_write)
0f9d1a10 2715 mnt_drop_write(nd->path.mnt);
16b1c1cd 2716 path_put(&save_parent);
e276ae67 2717 terminate_walk(nd);
2675a4eb 2718 return error;
fb1cc555 2719
fb1cc555
AV
2720exit_dput:
2721 path_put_conditional(path, nd);
ca344a89 2722 goto out;
015c3bbc 2723exit_fput:
2675a4eb
AV
2724 fput(file);
2725 goto out;
015c3bbc 2726
f60dc3db
MS
2727stale_open:
2728 /* If no saved parent or already retried then can't retry */
2729 if (!save_parent.dentry || retried)
2730 goto out;
2731
2732 BUG_ON(save_parent.dentry != dir);
2733 path_put(&nd->path);
2734 nd->path = save_parent;
2735 nd->inode = dir->d_inode;
2736 save_parent.mnt = NULL;
2737 save_parent.dentry = NULL;
2738 if (want_write) {
2739 mnt_drop_write(nd->path.mnt);
2740 want_write = false;
2741 }
2742 retried = true;
2743 goto retry_lookup;
fb1cc555
AV
2744}
2745
13aab428 2746static struct file *path_openat(int dfd, const char *pathname,
73d049a4 2747 struct nameidata *nd, const struct open_flags *op, int flags)
1da177e4 2748{
fe2d35ff 2749 struct file *base = NULL;
30d90494 2750 struct file *file;
9850c056 2751 struct path path;
47237687 2752 int opened = 0;
13aab428 2753 int error;
31e6b01f 2754
30d90494
AV
2755 file = get_empty_filp();
2756 if (!file)
31e6b01f
NP
2757 return ERR_PTR(-ENFILE);
2758
30d90494 2759 file->f_flags = op->open_flag;
31e6b01f 2760
73d049a4 2761 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
31e6b01f 2762 if (unlikely(error))
2675a4eb 2763 goto out;
31e6b01f 2764
fe2d35ff 2765 current->total_link_count = 0;
73d049a4 2766 error = link_path_walk(pathname, nd);
31e6b01f 2767 if (unlikely(error))
2675a4eb 2768 goto out;
1da177e4 2769
2675a4eb
AV
2770 error = do_last(nd, &path, file, op, &opened, pathname);
2771 while (unlikely(error > 0)) { /* trailing symlink */
7b9337aa 2772 struct path link = path;
def4af30 2773 void *cookie;
574197e0 2774 if (!(nd->flags & LOOKUP_FOLLOW)) {
73d049a4
AV
2775 path_put_conditional(&path, nd);
2776 path_put(&nd->path);
2675a4eb 2777 error = -ELOOP;
40b39136
AV
2778 break;
2779 }
73d049a4
AV
2780 nd->flags |= LOOKUP_PARENT;
2781 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
574197e0 2782 error = follow_link(&link, nd, &cookie);
c3e380b0 2783 if (unlikely(error))
2675a4eb
AV
2784 break;
2785 error = do_last(nd, &path, file, op, &opened, pathname);
574197e0 2786 put_link(nd, &link, cookie);
806b681c 2787 }
10fa8e62 2788out:
73d049a4
AV
2789 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2790 path_put(&nd->root);
fe2d35ff
AV
2791 if (base)
2792 fput(base);
2675a4eb
AV
2793 if (!(opened & FILE_OPENED)) {
2794 BUG_ON(!error);
30d90494 2795 put_filp(file);
16b1c1cd 2796 }
2675a4eb
AV
2797 if (unlikely(error)) {
2798 if (error == -EOPENSTALE) {
2799 if (flags & LOOKUP_RCU)
2800 error = -ECHILD;
2801 else
2802 error = -ESTALE;
2803 }
2804 file = ERR_PTR(error);
2805 }
2806 return file;
1da177e4
LT
2807}
2808
13aab428
AV
2809struct file *do_filp_open(int dfd, const char *pathname,
2810 const struct open_flags *op, int flags)
2811{
73d049a4 2812 struct nameidata nd;
13aab428
AV
2813 struct file *filp;
2814
73d049a4 2815 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
13aab428 2816 if (unlikely(filp == ERR_PTR(-ECHILD)))
73d049a4 2817 filp = path_openat(dfd, pathname, &nd, op, flags);
13aab428 2818 if (unlikely(filp == ERR_PTR(-ESTALE)))
73d049a4 2819 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
13aab428
AV
2820 return filp;
2821}
2822
73d049a4
AV
2823struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2824 const char *name, const struct open_flags *op, int flags)
2825{
2826 struct nameidata nd;
2827 struct file *file;
2828
2829 nd.root.mnt = mnt;
2830 nd.root.dentry = dentry;
2831
2832 flags |= LOOKUP_ROOT;
2833
bcda7652 2834 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
73d049a4
AV
2835 return ERR_PTR(-ELOOP);
2836
2837 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2838 if (unlikely(file == ERR_PTR(-ECHILD)))
2839 file = path_openat(-1, name, &nd, op, flags);
2840 if (unlikely(file == ERR_PTR(-ESTALE)))
2841 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2842 return file;
2843}
2844
ed75e95d 2845struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
1da177e4 2846{
c663e5d8 2847 struct dentry *dentry = ERR_PTR(-EEXIST);
ed75e95d
AV
2848 struct nameidata nd;
2849 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
2850 if (error)
2851 return ERR_PTR(error);
1da177e4 2852
c663e5d8
CH
2853 /*
2854 * Yucky last component or no last component at all?
2855 * (foo/., foo/.., /////)
2856 */
ed75e95d
AV
2857 if (nd.last_type != LAST_NORM)
2858 goto out;
2859 nd.flags &= ~LOOKUP_PARENT;
2860 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
c663e5d8
CH
2861
2862 /*
2863 * Do the final lookup.
2864 */
ed75e95d
AV
2865 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2866 dentry = lookup_hash(&nd);
1da177e4 2867 if (IS_ERR(dentry))
a8104a9f 2868 goto unlock;
c663e5d8 2869
a8104a9f 2870 error = -EEXIST;
e9baf6e5 2871 if (dentry->d_inode)
a8104a9f 2872 goto fail;
c663e5d8
CH
2873 /*
2874 * Special case - lookup gave negative, but... we had foo/bar/
2875 * From the vfs_mknod() POV we just have a negative dentry -
2876 * all is fine. Let's be bastards - you had / on the end, you've
2877 * been asking for (non-existent) directory. -ENOENT for you.
2878 */
ed75e95d 2879 if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
a8104a9f 2880 error = -ENOENT;
ed75e95d 2881 goto fail;
e9baf6e5 2882 }
a8104a9f
AV
2883 error = mnt_want_write(nd.path.mnt);
2884 if (error)
2885 goto fail;
ed75e95d 2886 *path = nd.path;
1da177e4 2887 return dentry;
1da177e4 2888fail:
a8104a9f
AV
2889 dput(dentry);
2890 dentry = ERR_PTR(error);
2891unlock:
ed75e95d
AV
2892 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2893out:
2894 path_put(&nd.path);
1da177e4
LT
2895 return dentry;
2896}
dae6ad8f
AV
2897EXPORT_SYMBOL(kern_path_create);
2898
921a1650
AV
2899void done_path_create(struct path *path, struct dentry *dentry)
2900{
2901 dput(dentry);
2902 mutex_unlock(&path->dentry->d_inode->i_mutex);
a8104a9f 2903 mnt_drop_write(path->mnt);
921a1650
AV
2904 path_put(path);
2905}
2906EXPORT_SYMBOL(done_path_create);
2907
dae6ad8f
AV
2908struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
2909{
2910 char *tmp = getname(pathname);
2911 struct dentry *res;
2912 if (IS_ERR(tmp))
2913 return ERR_CAST(tmp);
2914 res = kern_path_create(dfd, tmp, path, is_dir);
2915 putname(tmp);
2916 return res;
2917}
2918EXPORT_SYMBOL(user_path_create);
2919
1a67aafb 2920int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
1da177e4 2921{
a95164d9 2922 int error = may_create(dir, dentry);
1da177e4
LT
2923
2924 if (error)
2925 return error;
2926
975d6b39 2927 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1da177e4
LT
2928 return -EPERM;
2929
acfa4380 2930 if (!dir->i_op->mknod)
1da177e4
LT
2931 return -EPERM;
2932
08ce5f16
SH
2933 error = devcgroup_inode_mknod(mode, dev);
2934 if (error)
2935 return error;
2936
1da177e4
LT
2937 error = security_inode_mknod(dir, dentry, mode, dev);
2938 if (error)
2939 return error;
2940
1da177e4 2941 error = dir->i_op->mknod(dir, dentry, mode, dev);
a74574aa 2942 if (!error)
f38aa942 2943 fsnotify_create(dir, dentry);
1da177e4
LT
2944 return error;
2945}
2946
f69aac00 2947static int may_mknod(umode_t mode)
463c3197
DH
2948{
2949 switch (mode & S_IFMT) {
2950 case S_IFREG:
2951 case S_IFCHR:
2952 case S_IFBLK:
2953 case S_IFIFO:
2954 case S_IFSOCK:
2955 case 0: /* zero mode translates to S_IFREG */
2956 return 0;
2957 case S_IFDIR:
2958 return -EPERM;
2959 default:
2960 return -EINVAL;
2961 }
2962}
2963
8208a22b 2964SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2e4d0924 2965 unsigned, dev)
1da177e4 2966{
2ad94ae6 2967 struct dentry *dentry;
dae6ad8f
AV
2968 struct path path;
2969 int error;
1da177e4 2970
8e4bfca1
AV
2971 error = may_mknod(mode);
2972 if (error)
2973 return error;
1da177e4 2974
dae6ad8f
AV
2975 dentry = user_path_create(dfd, filename, &path, 0);
2976 if (IS_ERR(dentry))
2977 return PTR_ERR(dentry);
2ad94ae6 2978
dae6ad8f 2979 if (!IS_POSIXACL(path.dentry->d_inode))
ce3b0f8d 2980 mode &= ~current_umask();
dae6ad8f 2981 error = security_path_mknod(&path, dentry, mode, dev);
be6d3e56 2982 if (error)
a8104a9f 2983 goto out;
463c3197 2984 switch (mode & S_IFMT) {
1da177e4 2985 case 0: case S_IFREG:
312b63fb 2986 error = vfs_create(path.dentry->d_inode,dentry,mode,true);
1da177e4
LT
2987 break;
2988 case S_IFCHR: case S_IFBLK:
dae6ad8f 2989 error = vfs_mknod(path.dentry->d_inode,dentry,mode,
1da177e4
LT
2990 new_decode_dev(dev));
2991 break;
2992 case S_IFIFO: case S_IFSOCK:
dae6ad8f 2993 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
1da177e4 2994 break;
1da177e4 2995 }
a8104a9f 2996out:
921a1650 2997 done_path_create(&path, dentry);
1da177e4
LT
2998 return error;
2999}
3000
8208a22b 3001SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
5590ff0d
UD
3002{
3003 return sys_mknodat(AT_FDCWD, filename, mode, dev);
3004}
3005
18bb1db3 3006int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1da177e4 3007{
a95164d9 3008 int error = may_create(dir, dentry);
8de52778 3009 unsigned max_links = dir->i_sb->s_max_links;
1da177e4
LT
3010
3011 if (error)
3012 return error;
3013
acfa4380 3014 if (!dir->i_op->mkdir)
1da177e4
LT
3015 return -EPERM;
3016
3017 mode &= (S_IRWXUGO|S_ISVTX);
3018 error = security_inode_mkdir(dir, dentry, mode);
3019 if (error)
3020 return error;
3021
8de52778
AV
3022 if (max_links && dir->i_nlink >= max_links)
3023 return -EMLINK;
3024
1da177e4 3025 error = dir->i_op->mkdir(dir, dentry, mode);
a74574aa 3026 if (!error)
f38aa942 3027 fsnotify_mkdir(dir, dentry);
1da177e4
LT
3028 return error;
3029}
3030
a218d0fd 3031SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
1da177e4 3032{
6902d925 3033 struct dentry *dentry;
dae6ad8f
AV
3034 struct path path;
3035 int error;
1da177e4 3036
dae6ad8f 3037 dentry = user_path_create(dfd, pathname, &path, 1);
6902d925 3038 if (IS_ERR(dentry))
dae6ad8f 3039 return PTR_ERR(dentry);
1da177e4 3040
dae6ad8f 3041 if (!IS_POSIXACL(path.dentry->d_inode))
ce3b0f8d 3042 mode &= ~current_umask();
dae6ad8f 3043 error = security_path_mkdir(&path, dentry, mode);
a8104a9f
AV
3044 if (!error)
3045 error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
921a1650 3046 done_path_create(&path, dentry);
1da177e4
LT
3047 return error;
3048}
3049
a218d0fd 3050SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
5590ff0d
UD
3051{
3052 return sys_mkdirat(AT_FDCWD, pathname, mode);
3053}
3054
1da177e4 3055/*
a71905f0 3056 * The dentry_unhash() helper will try to drop the dentry early: we
c0d02594 3057 * should have a usage count of 1 if we're the only user of this
a71905f0
SW
3058 * dentry, and if that is true (possibly after pruning the dcache),
3059 * then we drop the dentry now.
1da177e4
LT
3060 *
3061 * A low-level filesystem can, if it choses, legally
3062 * do a
3063 *
3064 * if (!d_unhashed(dentry))
3065 * return -EBUSY;
3066 *
3067 * if it cannot handle the case of removing a directory
3068 * that is still in use by something else..
3069 */
3070void dentry_unhash(struct dentry *dentry)
3071{
dc168427 3072 shrink_dcache_parent(dentry);
1da177e4 3073 spin_lock(&dentry->d_lock);
64252c75 3074 if (dentry->d_count == 1)
1da177e4
LT
3075 __d_drop(dentry);
3076 spin_unlock(&dentry->d_lock);
1da177e4
LT
3077}
3078
3079int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3080{
3081 int error = may_delete(dir, dentry, 1);
3082
3083 if (error)
3084 return error;
3085
acfa4380 3086 if (!dir->i_op->rmdir)
1da177e4
LT
3087 return -EPERM;
3088
1d2ef590 3089 dget(dentry);
1b1dcc1b 3090 mutex_lock(&dentry->d_inode->i_mutex);
912dbc15
SW
3091
3092 error = -EBUSY;
1da177e4 3093 if (d_mountpoint(dentry))
912dbc15
SW
3094 goto out;
3095
3096 error = security_inode_rmdir(dir, dentry);
3097 if (error)
3098 goto out;
3099
3cebde24 3100 shrink_dcache_parent(dentry);
912dbc15
SW
3101 error = dir->i_op->rmdir(dir, dentry);
3102 if (error)
3103 goto out;
3104
3105 dentry->d_inode->i_flags |= S_DEAD;
3106 dont_mount(dentry);
3107
3108out:
1b1dcc1b 3109 mutex_unlock(&dentry->d_inode->i_mutex);
1d2ef590 3110 dput(dentry);
912dbc15 3111 if (!error)
1da177e4 3112 d_delete(dentry);
1da177e4
LT
3113 return error;
3114}
3115
5590ff0d 3116static long do_rmdir(int dfd, const char __user *pathname)
1da177e4
LT
3117{
3118 int error = 0;
3119 char * name;
3120 struct dentry *dentry;
3121 struct nameidata nd;
3122
2ad94ae6 3123 error = user_path_parent(dfd, pathname, &nd, &name);
1da177e4 3124 if (error)
2ad94ae6 3125 return error;
1da177e4
LT
3126
3127 switch(nd.last_type) {
0612d9fb
OH
3128 case LAST_DOTDOT:
3129 error = -ENOTEMPTY;
3130 goto exit1;
3131 case LAST_DOT:
3132 error = -EINVAL;
3133 goto exit1;
3134 case LAST_ROOT:
3135 error = -EBUSY;
3136 goto exit1;
1da177e4 3137 }
0612d9fb
OH
3138
3139 nd.flags &= ~LOOKUP_PARENT;
3140
4ac91378 3141 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
49705b77 3142 dentry = lookup_hash(&nd);
1da177e4 3143 error = PTR_ERR(dentry);
6902d925
DH
3144 if (IS_ERR(dentry))
3145 goto exit2;
e6bc45d6
TT
3146 if (!dentry->d_inode) {
3147 error = -ENOENT;
3148 goto exit3;
3149 }
0622753b
DH
3150 error = mnt_want_write(nd.path.mnt);
3151 if (error)
3152 goto exit3;
be6d3e56
KT
3153 error = security_path_rmdir(&nd.path, dentry);
3154 if (error)
3155 goto exit4;
4ac91378 3156 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
be6d3e56 3157exit4:
0622753b
DH
3158 mnt_drop_write(nd.path.mnt);
3159exit3:
6902d925
DH
3160 dput(dentry);
3161exit2:
4ac91378 3162 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1da177e4 3163exit1:
1d957f9b 3164 path_put(&nd.path);
1da177e4
LT
3165 putname(name);
3166 return error;
3167}
3168
3cdad428 3169SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
5590ff0d
UD
3170{
3171 return do_rmdir(AT_FDCWD, pathname);
3172}
3173
1da177e4
LT
3174int vfs_unlink(struct inode *dir, struct dentry *dentry)
3175{
3176 int error = may_delete(dir, dentry, 0);
3177
3178 if (error)
3179 return error;
3180
acfa4380 3181 if (!dir->i_op->unlink)
1da177e4
LT
3182 return -EPERM;
3183
1b1dcc1b 3184 mutex_lock(&dentry->d_inode->i_mutex);
1da177e4
LT
3185 if (d_mountpoint(dentry))
3186 error = -EBUSY;
3187 else {
3188 error = security_inode_unlink(dir, dentry);
bec1052e 3189 if (!error) {
1da177e4 3190 error = dir->i_op->unlink(dir, dentry);
bec1052e 3191 if (!error)
d83c49f3 3192 dont_mount(dentry);
bec1052e 3193 }
1da177e4 3194 }
1b1dcc1b 3195 mutex_unlock(&dentry->d_inode->i_mutex);
1da177e4
LT
3196
3197 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
3198 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
ece95912 3199 fsnotify_link_count(dentry->d_inode);
e234f35c 3200 d_delete(dentry);
1da177e4 3201 }
0eeca283 3202
1da177e4
LT
3203 return error;
3204}
3205
3206/*
3207 * Make sure that the actual truncation of the file will occur outside its
1b1dcc1b 3208 * directory's i_mutex. Truncate can take a long time if there is a lot of
1da177e4
LT
3209 * writeout happening, and we don't want to prevent access to the directory
3210 * while waiting on the I/O.
3211 */
5590ff0d 3212static long do_unlinkat(int dfd, const char __user *pathname)
1da177e4 3213{
2ad94ae6
AV
3214 int error;
3215 char *name;
1da177e4
LT
3216 struct dentry *dentry;
3217 struct nameidata nd;
3218 struct inode *inode = NULL;
3219
2ad94ae6 3220 error = user_path_parent(dfd, pathname, &nd, &name);
1da177e4 3221 if (error)
2ad94ae6
AV
3222 return error;
3223
1da177e4
LT
3224 error = -EISDIR;
3225 if (nd.last_type != LAST_NORM)
3226 goto exit1;
0612d9fb
OH
3227
3228 nd.flags &= ~LOOKUP_PARENT;
3229
4ac91378 3230 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
49705b77 3231 dentry = lookup_hash(&nd);
1da177e4
LT
3232 error = PTR_ERR(dentry);
3233 if (!IS_ERR(dentry)) {
3234 /* Why not before? Because we want correct error value */
50338b88
TE
3235 if (nd.last.name[nd.last.len])
3236 goto slashes;
1da177e4 3237 inode = dentry->d_inode;
50338b88 3238 if (!inode)
e6bc45d6
TT
3239 goto slashes;
3240 ihold(inode);
0622753b
DH
3241 error = mnt_want_write(nd.path.mnt);
3242 if (error)
3243 goto exit2;
be6d3e56
KT
3244 error = security_path_unlink(&nd.path, dentry);
3245 if (error)
3246 goto exit3;
4ac91378 3247 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
be6d3e56 3248exit3:
0622753b 3249 mnt_drop_write(nd.path.mnt);
1da177e4
LT
3250 exit2:
3251 dput(dentry);
3252 }
4ac91378 3253 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1da177e4
LT
3254 if (inode)
3255 iput(inode); /* truncate the inode here */
3256exit1:
1d957f9b 3257 path_put(&nd.path);
1da177e4
LT
3258 putname(name);
3259 return error;
3260
3261slashes:
3262 error = !dentry->d_inode ? -ENOENT :
3263 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
3264 goto exit2;
3265}
3266
2e4d0924 3267SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
5590ff0d
UD
3268{
3269 if ((flag & ~AT_REMOVEDIR) != 0)
3270 return -EINVAL;
3271
3272 if (flag & AT_REMOVEDIR)
3273 return do_rmdir(dfd, pathname);
3274
3275 return do_unlinkat(dfd, pathname);
3276}
3277
3480b257 3278SYSCALL_DEFINE1(unlink, const char __user *, pathname)
5590ff0d
UD
3279{
3280 return do_unlinkat(AT_FDCWD, pathname);
3281}
3282
db2e747b 3283int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1da177e4 3284{
a95164d9 3285 int error = may_create(dir, dentry);
1da177e4
LT
3286
3287 if (error)
3288 return error;
3289
acfa4380 3290 if (!dir->i_op->symlink)
1da177e4
LT
3291 return -EPERM;
3292
3293 error = security_inode_symlink(dir, dentry, oldname);
3294 if (error)
3295 return error;
3296
1da177e4 3297 error = dir->i_op->symlink(dir, dentry, oldname);
a74574aa 3298 if (!error)
f38aa942 3299 fsnotify_create(dir, dentry);
1da177e4
LT
3300 return error;
3301}
3302
2e4d0924
HC
3303SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
3304 int, newdfd, const char __user *, newname)
1da177e4 3305{
2ad94ae6
AV
3306 int error;
3307 char *from;
6902d925 3308 struct dentry *dentry;
dae6ad8f 3309 struct path path;
1da177e4
LT
3310
3311 from = getname(oldname);
2ad94ae6 3312 if (IS_ERR(from))
1da177e4 3313 return PTR_ERR(from);
1da177e4 3314
dae6ad8f 3315 dentry = user_path_create(newdfd, newname, &path, 0);
6902d925
DH
3316 error = PTR_ERR(dentry);
3317 if (IS_ERR(dentry))
dae6ad8f 3318 goto out_putname;
6902d925 3319
dae6ad8f 3320 error = security_path_symlink(&path, dentry, from);
a8104a9f
AV
3321 if (!error)
3322 error = vfs_symlink(path.dentry->d_inode, dentry, from);
921a1650 3323 done_path_create(&path, dentry);
6902d925 3324out_putname:
1da177e4
LT
3325 putname(from);
3326 return error;
3327}
3328
3480b257 3329SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
5590ff0d
UD
3330{
3331 return sys_symlinkat(oldname, AT_FDCWD, newname);
3332}
3333
1da177e4
LT
3334int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
3335{
3336 struct inode *inode = old_dentry->d_inode;
8de52778 3337 unsigned max_links = dir->i_sb->s_max_links;
1da177e4
LT
3338 int error;
3339
3340 if (!inode)
3341 return -ENOENT;
3342
a95164d9 3343 error = may_create(dir, new_dentry);
1da177e4
LT
3344 if (error)
3345 return error;
3346
3347 if (dir->i_sb != inode->i_sb)
3348 return -EXDEV;
3349
3350 /*
3351 * A link to an append-only or immutable file cannot be created.
3352 */
3353 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
3354 return -EPERM;
acfa4380 3355 if (!dir->i_op->link)
1da177e4 3356 return -EPERM;
7e79eedb 3357 if (S_ISDIR(inode->i_mode))
1da177e4
LT
3358 return -EPERM;
3359
3360 error = security_inode_link(old_dentry, dir, new_dentry);
3361 if (error)
3362 return error;
3363
7e79eedb 3364 mutex_lock(&inode->i_mutex);
aae8a97d
AK
3365 /* Make sure we don't allow creating hardlink to an unlinked file */
3366 if (inode->i_nlink == 0)
3367 error = -ENOENT;
8de52778
AV
3368 else if (max_links && inode->i_nlink >= max_links)
3369 error = -EMLINK;
aae8a97d
AK
3370 else
3371 error = dir->i_op->link(old_dentry, dir, new_dentry);
7e79eedb 3372 mutex_unlock(&inode->i_mutex);
e31e14ec 3373 if (!error)
7e79eedb 3374 fsnotify_link(dir, inode, new_dentry);
1da177e4
LT
3375 return error;
3376}
3377
3378/*
3379 * Hardlinks are often used in delicate situations. We avoid
3380 * security-related surprises by not following symlinks on the
3381 * newname. --KAB
3382 *
3383 * We don't follow them on the oldname either to be compatible
3384 * with linux 2.0, and to avoid hard-linking to directories
3385 * and other special files. --ADM
3386 */
2e4d0924
HC
3387SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3388 int, newdfd, const char __user *, newname, int, flags)
1da177e4
LT
3389{
3390 struct dentry *new_dentry;
dae6ad8f 3391 struct path old_path, new_path;
11a7b371 3392 int how = 0;
1da177e4 3393 int error;
1da177e4 3394
11a7b371 3395 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
c04030e1 3396 return -EINVAL;
11a7b371
AK
3397 /*
3398 * To use null names we require CAP_DAC_READ_SEARCH
3399 * This ensures that not everyone will be able to create
3400 * handlink using the passed filedescriptor.
3401 */
3402 if (flags & AT_EMPTY_PATH) {
3403 if (!capable(CAP_DAC_READ_SEARCH))
3404 return -ENOENT;
3405 how = LOOKUP_EMPTY;
3406 }
3407
3408 if (flags & AT_SYMLINK_FOLLOW)
3409 how |= LOOKUP_FOLLOW;
c04030e1 3410
11a7b371 3411 error = user_path_at(olddfd, oldname, how, &old_path);
1da177e4 3412 if (error)
2ad94ae6
AV
3413 return error;
3414
dae6ad8f 3415 new_dentry = user_path_create(newdfd, newname, &new_path, 0);
1da177e4 3416 error = PTR_ERR(new_dentry);
6902d925 3417 if (IS_ERR(new_dentry))
dae6ad8f
AV
3418 goto out;
3419
3420 error = -EXDEV;
3421 if (old_path.mnt != new_path.mnt)
3422 goto out_dput;
dae6ad8f 3423 error = security_path_link(old_path.dentry, &new_path, new_dentry);
be6d3e56 3424 if (error)
a8104a9f 3425 goto out_dput;
dae6ad8f 3426 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
75c3f29d 3427out_dput:
921a1650 3428 done_path_create(&new_path, new_dentry);
1da177e4 3429out:
2d8f3038 3430 path_put(&old_path);
1da177e4
LT
3431
3432 return error;
3433}
3434
3480b257 3435SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
5590ff0d 3436{
c04030e1 3437 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
5590ff0d
UD
3438}
3439
1da177e4
LT
3440/*
3441 * The worst of all namespace operations - renaming directory. "Perverted"
3442 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
3443 * Problems:
3444 * a) we can get into loop creation. Check is done in is_subdir().
3445 * b) race potential - two innocent renames can create a loop together.
3446 * That's where 4.4 screws up. Current fix: serialization on
a11f3a05 3447 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
1da177e4
LT
3448 * story.
3449 * c) we have to lock _three_ objects - parents and victim (if it exists).
1b1dcc1b 3450 * And that - after we got ->i_mutex on parents (until then we don't know
1da177e4
LT
3451 * whether the target exists). Solution: try to be smart with locking
3452 * order for inodes. We rely on the fact that tree topology may change
a11f3a05 3453 * only under ->s_vfs_rename_mutex _and_ that parent of the object we
1da177e4
LT
3454 * move will be locked. Thus we can rank directories by the tree
3455 * (ancestors first) and rank all non-directories after them.
3456 * That works since everybody except rename does "lock parent, lookup,
a11f3a05 3457 * lock child" and rename is under ->s_vfs_rename_mutex.
1da177e4
LT
3458 * HOWEVER, it relies on the assumption that any object with ->lookup()
3459 * has no more than 1 dentry. If "hybrid" objects will ever appear,
3460 * we'd better make sure that there's no link(2) for them.
e4eaac06 3461 * d) conversion from fhandle to dentry may come in the wrong moment - when
1b1dcc1b 3462 * we are removing the target. Solution: we will have to grab ->i_mutex
1da177e4 3463 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
c41b20e7 3464 * ->i_mutex on parents, which works but leads to some truly excessive
1da177e4
LT
3465 * locking].
3466 */
75c96f85
AB
3467static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3468 struct inode *new_dir, struct dentry *new_dentry)
1da177e4
LT
3469{
3470 int error = 0;
9055cba7 3471 struct inode *target = new_dentry->d_inode;
8de52778 3472 unsigned max_links = new_dir->i_sb->s_max_links;
1da177e4
LT
3473
3474 /*
3475 * If we are going to change the parent - check write permissions,
3476 * we'll need to flip '..'.
3477 */
3478 if (new_dir != old_dir) {
f419a2e3 3479 error = inode_permission(old_dentry->d_inode, MAY_WRITE);
1da177e4
LT
3480 if (error)
3481 return error;
3482 }
3483
3484 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
3485 if (error)
3486 return error;
3487
1d2ef590 3488 dget(new_dentry);
d83c49f3 3489 if (target)
1b1dcc1b 3490 mutex_lock(&target->i_mutex);
9055cba7
SW
3491
3492 error = -EBUSY;
3493 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
3494 goto out;
3495
8de52778
AV
3496 error = -EMLINK;
3497 if (max_links && !target && new_dir != old_dir &&
3498 new_dir->i_nlink >= max_links)
3499 goto out;
3500
3cebde24
SW
3501 if (target)
3502 shrink_dcache_parent(new_dentry);
9055cba7
SW
3503 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3504 if (error)
3505 goto out;
3506
1da177e4 3507 if (target) {
9055cba7
SW
3508 target->i_flags |= S_DEAD;
3509 dont_mount(new_dentry);
1da177e4 3510 }
9055cba7
SW
3511out:
3512 if (target)
3513 mutex_unlock(&target->i_mutex);
1d2ef590 3514 dput(new_dentry);
e31e14ec 3515 if (!error)
349457cc
MF
3516 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3517 d_move(old_dentry,new_dentry);
1da177e4
LT
3518 return error;
3519}
3520
75c96f85
AB
3521static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
3522 struct inode *new_dir, struct dentry *new_dentry)
1da177e4 3523{
51892bbb 3524 struct inode *target = new_dentry->d_inode;
1da177e4
LT
3525 int error;
3526
3527 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
3528 if (error)
3529 return error;
3530
3531 dget(new_dentry);
1da177e4 3532 if (target)
1b1dcc1b 3533 mutex_lock(&target->i_mutex);
51892bbb
SW
3534
3535 error = -EBUSY;
1da177e4 3536 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
51892bbb
SW
3537 goto out;
3538
3539 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3540 if (error)
3541 goto out;
3542
3543 if (target)
3544 dont_mount(new_dentry);
3545 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3546 d_move(old_dentry, new_dentry);
3547out:
1da177e4 3548 if (target)
1b1dcc1b 3549 mutex_unlock(&target->i_mutex);
1da177e4
LT
3550 dput(new_dentry);
3551 return error;
3552}
3553
3554int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
3555 struct inode *new_dir, struct dentry *new_dentry)
3556{
3557 int error;
3558 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
59b0df21 3559 const unsigned char *old_name;
1da177e4
LT
3560
3561 if (old_dentry->d_inode == new_dentry->d_inode)
3562 return 0;
3563
3564 error = may_delete(old_dir, old_dentry, is_dir);
3565 if (error)
3566 return error;
3567
3568 if (!new_dentry->d_inode)
a95164d9 3569 error = may_create(new_dir, new_dentry);
1da177e4
LT
3570 else
3571 error = may_delete(new_dir, new_dentry, is_dir);
3572 if (error)
3573 return error;
3574
acfa4380 3575 if (!old_dir->i_op->rename)
1da177e4
LT
3576 return -EPERM;
3577
0eeca283
RL
3578 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
3579
1da177e4
LT
3580 if (is_dir)
3581 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
3582 else
3583 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
123df294
AV
3584 if (!error)
3585 fsnotify_move(old_dir, new_dir, old_name, is_dir,
5a190ae6 3586 new_dentry->d_inode, old_dentry);
0eeca283
RL
3587 fsnotify_oldname_free(old_name);
3588
1da177e4
LT
3589 return error;
3590}
3591
2e4d0924
HC
3592SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
3593 int, newdfd, const char __user *, newname)
1da177e4 3594{
2ad94ae6
AV
3595 struct dentry *old_dir, *new_dir;
3596 struct dentry *old_dentry, *new_dentry;
3597 struct dentry *trap;
1da177e4 3598 struct nameidata oldnd, newnd;
2ad94ae6
AV
3599 char *from;
3600 char *to;
3601 int error;
1da177e4 3602
2ad94ae6 3603 error = user_path_parent(olddfd, oldname, &oldnd, &from);
1da177e4
LT
3604 if (error)
3605 goto exit;
3606
2ad94ae6 3607 error = user_path_parent(newdfd, newname, &newnd, &to);
1da177e4
LT
3608 if (error)
3609 goto exit1;
3610
3611 error = -EXDEV;
4ac91378 3612 if (oldnd.path.mnt != newnd.path.mnt)
1da177e4
LT
3613 goto exit2;
3614
4ac91378 3615 old_dir = oldnd.path.dentry;
1da177e4
LT
3616 error = -EBUSY;
3617 if (oldnd.last_type != LAST_NORM)
3618 goto exit2;
3619
4ac91378 3620 new_dir = newnd.path.dentry;
1da177e4
LT
3621 if (newnd.last_type != LAST_NORM)
3622 goto exit2;
3623
0612d9fb
OH
3624 oldnd.flags &= ~LOOKUP_PARENT;
3625 newnd.flags &= ~LOOKUP_PARENT;
4e9ed2f8 3626 newnd.flags |= LOOKUP_RENAME_TARGET;
0612d9fb 3627
1da177e4
LT
3628 trap = lock_rename(new_dir, old_dir);
3629
49705b77 3630 old_dentry = lookup_hash(&oldnd);
1da177e4
LT
3631 error = PTR_ERR(old_dentry);
3632 if (IS_ERR(old_dentry))
3633 goto exit3;
3634 /* source must exist */
3635 error = -ENOENT;
3636 if (!old_dentry->d_inode)
3637 goto exit4;
3638 /* unless the source is a directory trailing slashes give -ENOTDIR */
3639 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
3640 error = -ENOTDIR;
3641 if (oldnd.last.name[oldnd.last.len])
3642 goto exit4;
3643 if (newnd.last.name[newnd.last.len])
3644 goto exit4;
3645 }
3646 /* source should not be ancestor of target */
3647 error = -EINVAL;
3648 if (old_dentry == trap)
3649 goto exit4;
49705b77 3650 new_dentry = lookup_hash(&newnd);
1da177e4
LT
3651 error = PTR_ERR(new_dentry);
3652 if (IS_ERR(new_dentry))
3653 goto exit4;
3654 /* target should not be an ancestor of source */
3655 error = -ENOTEMPTY;
3656 if (new_dentry == trap)
3657 goto exit5;
3658
9079b1eb
DH
3659 error = mnt_want_write(oldnd.path.mnt);
3660 if (error)
3661 goto exit5;
be6d3e56
KT
3662 error = security_path_rename(&oldnd.path, old_dentry,
3663 &newnd.path, new_dentry);
3664 if (error)
3665 goto exit6;
1da177e4
LT
3666 error = vfs_rename(old_dir->d_inode, old_dentry,
3667 new_dir->d_inode, new_dentry);
be6d3e56 3668exit6:
9079b1eb 3669 mnt_drop_write(oldnd.path.mnt);
1da177e4
LT
3670exit5:
3671 dput(new_dentry);
3672exit4:
3673 dput(old_dentry);
3674exit3:
3675 unlock_rename(new_dir, old_dir);
3676exit2:
1d957f9b 3677 path_put(&newnd.path);
2ad94ae6 3678 putname(to);
1da177e4 3679exit1:
1d957f9b 3680 path_put(&oldnd.path);
1da177e4 3681 putname(from);
2ad94ae6 3682exit:
1da177e4
LT
3683 return error;
3684}
3685
a26eab24 3686SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
5590ff0d
UD
3687{
3688 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
3689}
3690
1da177e4
LT
3691int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
3692{
3693 int len;
3694
3695 len = PTR_ERR(link);
3696 if (IS_ERR(link))
3697 goto out;
3698
3699 len = strlen(link);
3700 if (len > (unsigned) buflen)
3701 len = buflen;
3702 if (copy_to_user(buffer, link, len))
3703 len = -EFAULT;
3704out:
3705 return len;
3706}
3707
3708/*
3709 * A helper for ->readlink(). This should be used *ONLY* for symlinks that
3710 * have ->follow_link() touching nd only in nd_set_link(). Using (or not
3711 * using) it for any given inode is up to filesystem.
3712 */
3713int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
3714{
3715 struct nameidata nd;
cc314eef 3716 void *cookie;
694a1764 3717 int res;
cc314eef 3718
1da177e4 3719 nd.depth = 0;
cc314eef 3720 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
694a1764
MS
3721 if (IS_ERR(cookie))
3722 return PTR_ERR(cookie);
3723
3724 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
3725 if (dentry->d_inode->i_op->put_link)
3726 dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
3727 return res;
1da177e4
LT
3728}
3729
3730int vfs_follow_link(struct nameidata *nd, const char *link)
3731{
3732 return __vfs_follow_link(nd, link);
3733}
3734
3735/* get the link contents into pagecache */
3736static char *page_getlink(struct dentry * dentry, struct page **ppage)
3737{
ebd09abb
DG
3738 char *kaddr;
3739 struct page *page;
1da177e4 3740 struct address_space *mapping = dentry->d_inode->i_mapping;
090d2b18 3741 page = read_mapping_page(mapping, 0, NULL);
1da177e4 3742 if (IS_ERR(page))
6fe6900e 3743 return (char*)page;
1da177e4 3744 *ppage = page;
ebd09abb
DG
3745 kaddr = kmap(page);
3746 nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
3747 return kaddr;
1da177e4
LT
3748}
3749
3750int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
3751{
3752 struct page *page = NULL;
3753 char *s = page_getlink(dentry, &page);
3754 int res = vfs_readlink(dentry,buffer,buflen,s);
3755 if (page) {
3756 kunmap(page);
3757 page_cache_release(page);
3758 }
3759 return res;
3760}
3761
cc314eef 3762void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
1da177e4 3763{
cc314eef 3764 struct page *page = NULL;
1da177e4 3765 nd_set_link(nd, page_getlink(dentry, &page));
cc314eef 3766 return page;
1da177e4
LT
3767}
3768
cc314eef 3769void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
1da177e4 3770{
cc314eef
LT
3771 struct page *page = cookie;
3772
3773 if (page) {
1da177e4
LT
3774 kunmap(page);
3775 page_cache_release(page);
1da177e4
LT
3776 }
3777}
3778
54566b2c
NP
3779/*
3780 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
3781 */
3782int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
1da177e4
LT
3783{
3784 struct address_space *mapping = inode->i_mapping;
0adb25d2 3785 struct page *page;
afddba49 3786 void *fsdata;
beb497ab 3787 int err;
1da177e4 3788 char *kaddr;
54566b2c
NP
3789 unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
3790 if (nofs)
3791 flags |= AOP_FLAG_NOFS;
1da177e4 3792
7e53cac4 3793retry:
afddba49 3794 err = pagecache_write_begin(NULL, mapping, 0, len-1,
54566b2c 3795 flags, &page, &fsdata);
1da177e4 3796 if (err)
afddba49
NP
3797 goto fail;
3798
e8e3c3d6 3799 kaddr = kmap_atomic(page);
1da177e4 3800 memcpy(kaddr, symname, len-1);
e8e3c3d6 3801 kunmap_atomic(kaddr);
afddba49
NP
3802
3803 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
3804 page, fsdata);
1da177e4
LT
3805 if (err < 0)
3806 goto fail;
afddba49
NP
3807 if (err < len-1)
3808 goto retry;
3809
1da177e4
LT
3810 mark_inode_dirty(inode);
3811 return 0;
1da177e4
LT
3812fail:
3813 return err;
3814}
3815
0adb25d2
KK
3816int page_symlink(struct inode *inode, const char *symname, int len)
3817{
3818 return __page_symlink(inode, symname, len,
54566b2c 3819 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
0adb25d2
KK
3820}
3821
92e1d5be 3822const struct inode_operations page_symlink_inode_operations = {
1da177e4
LT
3823 .readlink = generic_readlink,
3824 .follow_link = page_follow_link_light,
3825 .put_link = page_put_link,
3826};
3827
2d8f3038 3828EXPORT_SYMBOL(user_path_at);
cc53ce53 3829EXPORT_SYMBOL(follow_down_one);
1da177e4
LT
3830EXPORT_SYMBOL(follow_down);
3831EXPORT_SYMBOL(follow_up);
3832EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
3833EXPORT_SYMBOL(getname);
3834EXPORT_SYMBOL(lock_rename);
1da177e4
LT
3835EXPORT_SYMBOL(lookup_one_len);
3836EXPORT_SYMBOL(page_follow_link_light);
3837EXPORT_SYMBOL(page_put_link);
3838EXPORT_SYMBOL(page_readlink);
0adb25d2 3839EXPORT_SYMBOL(__page_symlink);
1da177e4
LT
3840EXPORT_SYMBOL(page_symlink);
3841EXPORT_SYMBOL(page_symlink_inode_operations);
d1811465 3842EXPORT_SYMBOL(kern_path);
16f18200 3843EXPORT_SYMBOL(vfs_path_lookup);
f419a2e3 3844EXPORT_SYMBOL(inode_permission);
1da177e4
LT
3845EXPORT_SYMBOL(unlock_rename);
3846EXPORT_SYMBOL(vfs_create);
3847EXPORT_SYMBOL(vfs_follow_link);
3848EXPORT_SYMBOL(vfs_link);
3849EXPORT_SYMBOL(vfs_mkdir);
3850EXPORT_SYMBOL(vfs_mknod);
3851EXPORT_SYMBOL(generic_permission);
3852EXPORT_SYMBOL(vfs_readlink);
3853EXPORT_SYMBOL(vfs_rename);
3854EXPORT_SYMBOL(vfs_rmdir);
3855EXPORT_SYMBOL(vfs_symlink);
3856EXPORT_SYMBOL(vfs_unlink);
3857EXPORT_SYMBOL(dentry_unhash);
3858EXPORT_SYMBOL(generic_readlink);